Skip to content

Commit

Permalink
feat(clickhouse): Parse window functions in ParameterizedAggFuncs (#3347
Browse files Browse the repository at this point in the history
)

* feat(clickhouse): Parse window functions in ParameterizedAggFuncs

* Fix CombinedAggFunc & AnonymousAggFunc post-window parsing

* Merge is_combined branches

Co-authored-by: Jo <46752250+georgesittas@users.noreply.github.com>

* Add expr based tests

* Update tests/dialects/test_clickhouse.py

Co-authored-by: Jo <46752250+georgesittas@users.noreply.github.com>

* Break up branches

* Further refactor parse_function

* One more refactor

* typo

---------

Co-authored-by: Jo <46752250+georgesittas@users.noreply.github.com>
  • Loading branch information
VaggelisD and georgesittas committed Apr 25, 2024
1 parent fb9a7ad commit ce7d893
Show file tree
Hide file tree
Showing 2 changed files with 72 additions and 33 deletions.
77 changes: 44 additions & 33 deletions sqlglot/dialects/clickhouse.py
Expand Up @@ -254,6 +254,14 @@ class Parser(parser.Parser):
"sparkBar",
"sumCount",
"largestTriangleThreeBuckets",
"histogram",
"sequenceMatch",
"sequenceCount",
"windowFunnel",
"retention",
"uniqUpTo",
"sequenceNextNode",
"exponentialTimeDecayedAvg",
}

AGG_FUNCTIONS_SUFFIXES = [
Expand Down Expand Up @@ -463,48 +471,51 @@ def _parse_function(
optional_parens: bool = True,
any_token: bool = False,
) -> t.Optional[exp.Expression]:
func = super()._parse_function(
expr = super()._parse_function(
functions=functions,
anonymous=anonymous,
optional_parens=optional_parens,
any_token=any_token,
)

if isinstance(func, exp.Anonymous):
parts = self.AGG_FUNC_MAPPING.get(func.this)
func = expr.this if isinstance(expr, exp.Window) else expr

# Aggregate functions can be split in 2 parts: <func_name><suffix>
parts = (
self.AGG_FUNC_MAPPING.get(func.this) if isinstance(func, exp.Anonymous) else None
)

if parts:
params = self._parse_func_params(func)

kwargs = {
"this": func.this,
"expressions": func.expressions,
}
if parts[1]:
kwargs["parts"] = parts
exp_class = exp.CombinedParameterizedAgg if params else exp.CombinedAggFunc
else:
exp_class = exp.ParameterizedAgg if params else exp.AnonymousAggFunc

kwargs["exp_class"] = exp_class
if params:
if parts and parts[1]:
return self.expression(
exp.CombinedParameterizedAgg,
this=func.this,
expressions=func.expressions,
params=params,
parts=parts,
)
return self.expression(
exp.ParameterizedAgg,
this=func.this,
expressions=func.expressions,
params=params,
)

if parts:
if parts[1]:
return self.expression(
exp.CombinedAggFunc,
this=func.this,
expressions=func.expressions,
parts=parts,
)
return self.expression(
exp.AnonymousAggFunc,
this=func.this,
expressions=func.expressions,
)

return func
kwargs["params"] = params

func = self.expression(**kwargs)

if isinstance(expr, exp.Window):
# The window's func was parsed as Anonymous in base parser, fix its
# type to be CH style CombinedAnonymousAggFunc / AnonymousAggFunc
expr.set("this", func)
elif params:
# Params have blocked super()._parse_function() from parsing the following window
# (if that exists) as they're standing between the function call and the window spec
expr = self._parse_window(func)
else:
expr = func

return expr

def _parse_func_params(
self, this: t.Optional[exp.Func] = None
Expand Down
28 changes: 28 additions & 0 deletions tests/dialects/test_clickhouse.py
Expand Up @@ -93,6 +93,9 @@ def test_clickhouse(self):
self.validate_identity("""SELECT JSONExtractString('{"x": {"y": 1}}', 'x', 'y')""")
self.validate_identity("SELECT * FROM table LIMIT 1 BY a, b")
self.validate_identity("SELECT * FROM table LIMIT 2 OFFSET 1 BY a, b")
self.validate_identity(
"SELECT id, quantileGK(100, 0.95)(reading) OVER (PARTITION BY id ORDER BY id RANGE BETWEEN 30000 PRECEDING AND CURRENT ROW) AS window FROM table"
)

self.validate_identity(
"SELECT $1$foo$1$",
Expand Down Expand Up @@ -826,3 +829,28 @@ def test_ddl(self):
self.validate_identity(
"CREATE TABLE t1 (a String EPHEMERAL, b String EPHEMERAL func(), c String MATERIALIZED func(), d String ALIAS func()) ENGINE=TinyLog()"
)

def test_agg_functions(self):
def extract_agg_func(query):
return parse_one(query, read="clickhouse").selects[0].this

self.assertIsInstance(
extract_agg_func("select quantileGK(100, 0.95) OVER (PARTITION BY id) FROM table"),
exp.AnonymousAggFunc,
)
self.assertIsInstance(
extract_agg_func(
"select quantileGK(100, 0.95)(reading) OVER (PARTITION BY id) FROM table"
),
exp.ParameterizedAgg,
)
self.assertIsInstance(
extract_agg_func("select quantileGKIf(100, 0.95) OVER (PARTITION BY id) FROM table"),
exp.CombinedAggFunc,
)
self.assertIsInstance(
extract_agg_func(
"select quantileGKIf(100, 0.95)(reading) OVER (PARTITION BY id) FROM table"
),
exp.CombinedParameterizedAgg,
)

0 comments on commit ce7d893

Please sign in to comment.