Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

SparkSQL: Update terminator grammar for HAVING, WHERE, GROUP BY #3526

Merged
merged 7 commits into from Jul 1, 2022
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
53 changes: 51 additions & 2 deletions src/sqlfluff/dialects/dialect_sparksql.py
Expand Up @@ -265,6 +265,25 @@
Ref("SingleQuotedIdentifierSegment"),
Ref("BackQuotedIdentifierSegment"),
),
WhereClauseTerminatorGrammar=OneOf(
"LIMIT",
Sequence(
OneOf(
"CLUSTER",
"DISTRIBUTE",
"GROUP",
"ORDER",
"SORT",
),
"BY",
),
Sequence("ORDER", "BY"),
Sequence("DISTRIBUTE", "BY"),
"HAVING",
"QUALIFY",
"WINDOW",
"OVERLAPS",
),
)

sparksql_dialect.add(
Expand Down Expand Up @@ -540,6 +559,20 @@
type="literal",
),
),
GroupByClauseTerminatorGrammar=OneOf(
Sequence(
OneOf(
"ORDER",
"DISTRIBUTE",
"CLUSTER",
"SORT",
),
"BY",
),
"LIMIT",
"HAVING",
"WINDOW",
),
R7L208 marked this conversation as resolved.
Show resolved Hide resolved
)

# Adding Hint related grammar before comment `block_comment` and
Expand Down Expand Up @@ -1424,7 +1457,7 @@ class GroupByClauseSegment(ansi.GroupByClauseSegment):

match_grammar = StartsWith(
Sequence("GROUP", "BY"),
terminator=OneOf("ORDER", "LIMIT", "HAVING", "WINDOW"),
terminator=Ref("GroupByClauseTerminatorGrammar"),
enforce_whitespace_preceding_terminator=True,
)

Expand All @@ -1442,7 +1475,7 @@ class GroupByClauseSegment(ansi.GroupByClauseSegment):
Ref("CubeRollupClauseSegment"),
Ref("GroupingSetsClauseSegment"),
),
terminator=OneOf("ORDER", "LIMIT", "HAVING", "WINDOW"),
terminator=Ref("GroupByClauseTerminatorGrammar"),
),
# TODO: New Rule
# Warn if CubeRollupClauseSegment and
Expand Down Expand Up @@ -2762,3 +2795,19 @@ class RestoreTableStatementSegment(BaseSegment):
Ref("VersionAsOfGrammar"),
),
)


class HavingClauseSegment(ansi.HavingClauseSegment):
"""A `HAVING` clause."""

type = "having_clause"
match_grammar = ansi.HavingClauseSegment.match_grammar.copy()
match_grammar.terminator = match_grammar.terminator.copy( # type: ignore
insert=[
Sequence(
OneOf("CLUSTER", "DISTRIBUTE", "SORT"),
"BY",
),
],
)
parse_grammar = ansi.HavingClauseSegment.parse_grammar
10 changes: 10 additions & 0 deletions test/fixtures/dialects/sparksql/issue_3484.sql
@@ -0,0 +1,10 @@
-- https://github.com/sqlfluff/sqlfluff/issues/3484
WITH cte AS (
SELECT *
FROM source
WHERE col1 = 0
DISTRIBUTE BY col1
),

SELECT *
FROM cte
58 changes: 58 additions & 0 deletions test/fixtures/dialects/sparksql/issue_3484.yml
@@ -0,0 +1,58 @@
# YML test files are auto-generated from SQL files and should not be edited by
# hand. To help enforce this, the "hash" field in the file must match a hash
# computed by SQLFluff when running the tests. Please run
# `python test/generate_parse_fixture_yml.py` to generate them after adding or
# altering SQL files.
_hash: 7f337f5742ac96fa2fb84a92a52d4c994f73d350be13d6e645d59cbe945af2c4
file:
statement:
with_compound_statement:
keyword: WITH
common_table_expression:
identifier: cte
keyword: AS
bracketed:
start_bracket: (
select_statement:
select_clause:
keyword: SELECT
select_clause_element:
wildcard_expression:
wildcard_identifier:
star: '*'
from_clause:
keyword: FROM
from_expression:
from_expression_element:
table_expression:
table_reference:
identifier: source
where_clause:
keyword: WHERE
expression:
column_reference:
identifier: col1
comparison_operator:
raw_comparison_operator: '='
literal: '0'
distribute_by_clause:
- keyword: DISTRIBUTE
- keyword: BY
- column_reference:
identifier: col1
end_bracket: )
comma: ','
select_statement:
select_clause:
keyword: SELECT
select_clause_element:
wildcard_expression:
wildcard_identifier:
star: '*'
from_clause:
keyword: FROM
from_expression:
from_expression_element:
table_expression:
table_reference:
identifier: cte
22 changes: 22 additions & 0 deletions test/fixtures/dialects/sparksql/select_cluster_by.sql
Expand Up @@ -30,3 +30,25 @@ SELECT
FROM person
CLUSTER BY
LEFT(SUBSTRING_INDEX(name, ' ', -1), 1);

SELECT
age,
name
FROM person
WHERE age <= 100
CLUSTER BY age;

SELECT
age,
name
FROM person
GROUP BY age
CLUSTER BY age;

SELECT
age,
name
FROM person
GROUP BY age
HAVING COUNT(age) > 1
CLUSTER BY age;
108 changes: 107 additions & 1 deletion test/fixtures/dialects/sparksql/select_cluster_by.yml
Expand Up @@ -3,7 +3,7 @@
# computed by SQLFluff when running the tests. Please run
# `python test/generate_parse_fixture_yml.py` to generate them after adding or
# altering SQL files.
_hash: ff198ba535c84b801c569c815830e92738a537853f207ad77facffe3412aa277
_hash: 62fd488eda564c95d2b693d96671c83f6ebcd6f2966999ea091f135b985f96d2
file:
- statement:
select_statement:
Expand Down Expand Up @@ -129,3 +129,109 @@ file:
literal: '1'
- end_bracket: )
- statement_terminator: ;
- statement:
select_statement:
select_clause:
- keyword: SELECT
- select_clause_element:
column_reference:
identifier: age
- comma: ','
- select_clause_element:
column_reference:
identifier: name
from_clause:
keyword: FROM
from_expression:
from_expression_element:
table_expression:
table_reference:
identifier: person
where_clause:
keyword: WHERE
expression:
column_reference:
identifier: age
comparison_operator:
- raw_comparison_operator: <
- raw_comparison_operator: '='
literal: '100'
cluster_by_clause:
- keyword: CLUSTER
- keyword: BY
- column_reference:
identifier: age
- statement_terminator: ;
- statement:
select_statement:
select_clause:
- keyword: SELECT
- select_clause_element:
column_reference:
identifier: age
- comma: ','
- select_clause_element:
column_reference:
identifier: name
from_clause:
keyword: FROM
from_expression:
from_expression_element:
table_expression:
table_reference:
identifier: person
groupby_clause:
- keyword: GROUP
- keyword: BY
- column_reference:
identifier: age
cluster_by_clause:
- keyword: CLUSTER
- keyword: BY
- column_reference:
identifier: age
- statement_terminator: ;
- statement:
select_statement:
select_clause:
- keyword: SELECT
- select_clause_element:
column_reference:
identifier: age
- comma: ','
- select_clause_element:
column_reference:
identifier: name
from_clause:
keyword: FROM
from_expression:
from_expression_element:
table_expression:
table_reference:
identifier: person
groupby_clause:
- keyword: GROUP
- keyword: BY
- column_reference:
identifier: age
having_clause:
keyword: HAVING
expression:
function:
function_name:
function_name_identifier: COUNT
bracketed:
start_bracket: (
expression:
column_reference:
identifier: age
end_bracket: )
comparison_operator:
raw_comparison_operator: '>'
literal: '1'
cluster_by_clause:
- keyword: CLUSTER
- keyword: BY
- column_reference:
identifier: age
- statement_terminator: ;
22 changes: 22 additions & 0 deletions test/fixtures/dialects/sparksql/select_distribute_by.sql
Expand Up @@ -28,3 +28,25 @@ SELECT
FROM person
DISTRIBUTE BY
LEFT(SUBSTRING_INDEX(name, ' ', -1), 1);

SELECT
age,
name
FROM person
WHERE age <= 100
DISTRIBUTE BY age;

SELECT
age,
name
FROM person
GROUP BY age
DISTRIBUTE BY age;

SELECT
age,
name
FROM person
GROUP BY age
HAVING COUNT(age) > 1
DISTRIBUTE BY age;