Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

SparkSQL: Update terminator grammar for HAVING, WHERE, GROUP BY #3526

Merged
merged 7 commits into from Jul 1, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
25 changes: 18 additions & 7 deletions src/sqlfluff/dialects/dialect_ansi.py
Expand Up @@ -490,6 +490,19 @@
"WINDOW",
"OVERLAPS",
),
GroupByClauseTerminatorGrammar=OneOf(
Sequence("ORDER", "BY"),
"LIMIT",
"HAVING",
"QUALIFY",
"WINDOW",
),
HavingClauseTerminatorGrammar=OneOf(
Sequence("ORDER", "BY"),
"LIMIT",
"QUALIFY",
"WINDOW",
),
OrderByClauseTerminators=OneOf(
"LIMIT",
"HAVING",
Expand Down Expand Up @@ -1992,13 +2005,13 @@ class GroupByClauseSegment(BaseSegment):
"""A `GROUP BY` clause like in `SELECT`."""

type = "groupby_clause"

match_grammar: Matchable = StartsWith(
Sequence("GROUP", "BY"),
terminator=OneOf(
Sequence("ORDER", "BY"), "LIMIT", "HAVING", "QUALIFY", "WINDOW"
),
terminator=Ref("GroupByClauseTerminatorGrammar"),
enforce_whitespace_preceding_terminator=True,
)

parse_grammar: Optional[Matchable] = Sequence(
"GROUP",
"BY",
Expand All @@ -2011,9 +2024,7 @@ class GroupByClauseSegment(BaseSegment):
# Can `GROUP BY coalesce(col, 1)`
Ref("ExpressionSegment"),
),
terminator=OneOf(
Sequence("ORDER", "BY"), "LIMIT", "HAVING", "QUALIFY", "WINDOW"
),
terminator=Ref("GroupByClauseTerminatorGrammar"),
),
Dedent,
)
Expand All @@ -2025,7 +2036,7 @@ class HavingClauseSegment(BaseSegment):
type = "having_clause"
match_grammar: Matchable = StartsWith(
"HAVING",
terminator=OneOf(Sequence("ORDER", "BY"), "LIMIT", "QUALIFY", "WINDOW"),
terminator=Ref("HavingClauseTerminatorGrammar"),
enforce_whitespace_preceding_terminator=True,
)
parse_grammar: Optional[Matchable] = Sequence(
Expand Down
54 changes: 24 additions & 30 deletions src/sqlfluff/dialects/dialect_hive.py
Expand Up @@ -204,6 +204,30 @@
],
before=Sequence("ORDER", "BY"),
),
GroupByClauseTerminatorGrammar=OneOf(
Sequence(
OneOf("ORDER", "CLUSTER", "DISTRIBUTE", "SORT"),
"BY",
),
"LIMIT",
"HAVING",
"QUALIFY",
"WINDOW",
),
HavingClauseTerminatorGrammar=OneOf(
Sequence(
OneOf(
"ORDER",
"CLUSTER",
"DISTRIBUTE",
"SORT",
),
"BY",
),
"LIMIT",
"QUALIFY",
"WINDOW",
),
)


Expand Down Expand Up @@ -838,36 +862,6 @@ class SelectClauseSegment(ansi.SelectClauseSegment):
parse_grammar = ansi.SelectClauseSegment.parse_grammar.copy()


class GroupByClauseSegment(ansi.GroupByClauseSegment):
"""Overriding GroupByClauseSegment to allow for additional segment parsing."""

match_grammar = ansi.GroupByClauseSegment.match_grammar.copy()
match_grammar.terminator = match_grammar.terminator.copy( # type: ignore
insert=[
Sequence("CLUSTER", "BY"),
Sequence("DISTRIBUTE", "BY"),
Sequence("SORT", "BY"),
],
before=Ref.keyword("LIMIT"),
)
parse_grammar = ansi.GroupByClauseSegment.parse_grammar


class HavingClauseSegment(ansi.HavingClauseSegment):
"""Overriding HavingClauseSegment to allow for additional segment parsing."""

match_grammar = ansi.HavingClauseSegment.match_grammar.copy()
match_grammar.terminator = match_grammar.terminator.copy( # type: ignore
insert=[
Sequence("CLUSTER", "BY"),
Sequence("DISTRIBUTE", "BY"),
Sequence("SORT", "BY"),
],
before=Ref.keyword("LIMIT"),
)
parse_grammar = ansi.HavingClauseSegment.parse_grammar


class SetExpressionSegment(ansi.SetExpressionSegment):
"""Overriding SetExpressionSegment to allow for additional segment parsing."""

Expand Down
31 changes: 15 additions & 16 deletions src/sqlfluff/dialects/dialect_snowflake.py
Expand Up @@ -4,6 +4,7 @@

Based on https://docs.snowflake.com/en/sql-reference-commands.html
"""
from typing import Optional

from sqlfluff.core.dialects import load_raw_dialect
from sqlfluff.core.parser import (
Expand Down Expand Up @@ -645,6 +646,17 @@
"MEASURES",
),
TrimParametersGrammar=Nothing(),
GroupByClauseTerminatorGrammar=OneOf(
"ORDER", "LIMIT", "FETCH", "OFFSET", "HAVING", "QUALIFY", "WINDOW"
),
HavingClauseTerminatorGrammar=OneOf(
Sequence("ORDER", "BY"),
"LIMIT",
"QUALIFY",
"WINDOW",
"FETCH",
"OFFSET",
),
)

# Add all Snowflake keywords
Expand Down Expand Up @@ -828,14 +840,12 @@ class GroupByClauseSegment(ansi.GroupByClauseSegment):
https://docs.snowflake.com/en/sql-reference/constructs/group-by.html
"""

match_grammar = StartsWith(
match_grammar: Matchable = StartsWith(
Sequence("GROUP", "BY"),
terminator=OneOf(
"ORDER", "LIMIT", "FETCH", "OFFSET", "HAVING", "QUALIFY", "WINDOW"
),
terminator=Ref("GroupByClauseTerminatorGrammar"),
enforce_whitespace_preceding_terminator=True,
)
parse_grammar = Sequence(
parse_grammar: Optional[Matchable] = Sequence(
"GROUP",
"BY",
Indent,
Expand Down Expand Up @@ -5189,17 +5199,6 @@ class OrderByClauseSegment(ansi.OrderByClauseSegment):
)


class HavingClauseSegment(ansi.HavingClauseSegment):
"""A `HAVING` clause."""

type = "having_clause"
match_grammar = ansi.HavingClauseSegment.match_grammar.copy()
match_grammar.terminator = match_grammar.terminator.copy( # type: ignore
insert=[Ref.keyword("FETCH"), Ref.keyword("OFFSET")],
)
parse_grammar = ansi.HavingClauseSegment.parse_grammar


class DropProcedureStatementSegment(BaseSegment):
"""A snowflake `DROP PROCEDURE ...` statement.

Expand Down
51 changes: 49 additions & 2 deletions src/sqlfluff/dialects/dialect_sparksql.py
Expand Up @@ -266,6 +266,53 @@
Ref("SingleQuotedIdentifierSegment"),
Ref("BackQuotedIdentifierSegment"),
),
WhereClauseTerminatorGrammar=OneOf(
"LIMIT",
Sequence(
OneOf(
"CLUSTER",
"DISTRIBUTE",
"GROUP",
"ORDER",
"SORT",
),
"BY",
),
Sequence("ORDER", "BY"),
Sequence("DISTRIBUTE", "BY"),
"HAVING",
"QUALIFY",
"WINDOW",
"OVERLAPS",
),
GroupByClauseTerminatorGrammar=OneOf(
Sequence(
OneOf(
"ORDER",
"DISTRIBUTE",
"CLUSTER",
"SORT",
),
"BY",
),
"LIMIT",
"HAVING",
"WINDOW",
),
HavingClauseTerminatorGrammar=OneOf(
Sequence(
OneOf(
"ORDER",
"CLUSTER",
"DISTRIBUTE",
"SORT",
),
"BY",
),
"LIMIT",
"QUALIFY",
"WINDOW",
),
)

sparksql_dialect.add(
Expand Down Expand Up @@ -1425,7 +1472,7 @@ class GroupByClauseSegment(ansi.GroupByClauseSegment):

match_grammar = StartsWith(
Sequence("GROUP", "BY"),
terminator=OneOf("ORDER", "LIMIT", "HAVING", "WINDOW"),
terminator=Ref("GroupByClauseTerminatorGrammar"),
enforce_whitespace_preceding_terminator=True,
)

Expand All @@ -1443,7 +1490,7 @@ class GroupByClauseSegment(ansi.GroupByClauseSegment):
Ref("CubeRollupClauseSegment"),
Ref("GroupingSetsClauseSegment"),
),
terminator=OneOf("ORDER", "LIMIT", "HAVING", "WINDOW"),
terminator=Ref("GroupByClauseTerminatorGrammar"),
),
# TODO: New Rule
# Warn if CubeRollupClauseSegment and
Expand Down
10 changes: 10 additions & 0 deletions test/fixtures/dialects/sparksql/issue_3484.sql
@@ -0,0 +1,10 @@
-- https://github.com/sqlfluff/sqlfluff/issues/3484
WITH cte AS (
SELECT *
FROM source
WHERE col1 = 0
DISTRIBUTE BY col1
),

SELECT *
FROM cte
58 changes: 58 additions & 0 deletions test/fixtures/dialects/sparksql/issue_3484.yml
@@ -0,0 +1,58 @@
# YML test files are auto-generated from SQL files and should not be edited by
# hand. To help enforce this, the "hash" field in the file must match a hash
# computed by SQLFluff when running the tests. Please run
# `python test/generate_parse_fixture_yml.py` to generate them after adding or
# altering SQL files.
_hash: 7f337f5742ac96fa2fb84a92a52d4c994f73d350be13d6e645d59cbe945af2c4
file:
statement:
with_compound_statement:
keyword: WITH
common_table_expression:
identifier: cte
keyword: AS
bracketed:
start_bracket: (
select_statement:
select_clause:
keyword: SELECT
select_clause_element:
wildcard_expression:
wildcard_identifier:
star: '*'
from_clause:
keyword: FROM
from_expression:
from_expression_element:
table_expression:
table_reference:
identifier: source
where_clause:
keyword: WHERE
expression:
column_reference:
identifier: col1
comparison_operator:
raw_comparison_operator: '='
literal: '0'
distribute_by_clause:
- keyword: DISTRIBUTE
- keyword: BY
- column_reference:
identifier: col1
end_bracket: )
comma: ','
select_statement:
select_clause:
keyword: SELECT
select_clause_element:
wildcard_expression:
wildcard_identifier:
star: '*'
from_clause:
keyword: FROM
from_expression:
from_expression_element:
table_expression:
table_reference:
identifier: cte
22 changes: 22 additions & 0 deletions test/fixtures/dialects/sparksql/select_cluster_by.sql
Expand Up @@ -30,3 +30,25 @@ SELECT
FROM person
CLUSTER BY
LEFT(SUBSTRING_INDEX(name, ' ', -1), 1);

SELECT
age,
name
FROM person
WHERE age <= 100
CLUSTER BY age;

SELECT
age,
name
FROM person
GROUP BY age
CLUSTER BY age;

SELECT
age,
name
FROM person
GROUP BY age
HAVING COUNT(age) > 1
CLUSTER BY age;