sqlfluff · tunetheweb · Jul 1, 2022 · Jun 30, 2022 · Jun 30, 2022 · Jul 1, 2022
diff --git a/src/sqlfluff/dialects/dialect_ansi.py b/src/sqlfluff/dialects/dialect_ansi.py
@@ -490,6 +490,19 @@
         "WINDOW",
         "OVERLAPS",
     ),
+    GroupByClauseTerminatorGrammar=OneOf(
+        Sequence("ORDER", "BY"),
+        "LIMIT",
+        "HAVING",
+        "QUALIFY",
+        "WINDOW",
+    ),
+    HavingClauseTerminatorGrammar=OneOf(
+        Sequence("ORDER", "BY"),
+        "LIMIT",
+        "QUALIFY",
+        "WINDOW",
+    ),
     OrderByClauseTerminators=OneOf(
         "LIMIT",
         "HAVING",
@@ -1992,13 +2005,13 @@ class GroupByClauseSegment(BaseSegment):
     """A `GROUP BY` clause like in `SELECT`."""
 
     type = "groupby_clause"
+
     match_grammar: Matchable = StartsWith(
         Sequence("GROUP", "BY"),
-        terminator=OneOf(
-            Sequence("ORDER", "BY"), "LIMIT", "HAVING", "QUALIFY", "WINDOW"
-        ),
+        terminator=Ref("GroupByClauseTerminatorGrammar"),
         enforce_whitespace_preceding_terminator=True,
     )
+
     parse_grammar: Optional[Matchable] = Sequence(
         "GROUP",
         "BY",
@@ -2011,9 +2024,7 @@ class GroupByClauseSegment(BaseSegment):
                 # Can `GROUP BY coalesce(col, 1)`
                 Ref("ExpressionSegment"),
             ),
-            terminator=OneOf(
-                Sequence("ORDER", "BY"), "LIMIT", "HAVING", "QUALIFY", "WINDOW"
-            ),
+            terminator=Ref("GroupByClauseTerminatorGrammar"),
         ),
         Dedent,
     )
@@ -2025,7 +2036,7 @@ class HavingClauseSegment(BaseSegment):
     type = "having_clause"
     match_grammar: Matchable = StartsWith(
         "HAVING",
-        terminator=OneOf(Sequence("ORDER", "BY"), "LIMIT", "QUALIFY", "WINDOW"),
+        terminator=Ref("HavingClauseTerminatorGrammar"),
         enforce_whitespace_preceding_terminator=True,
     )
     parse_grammar: Optional[Matchable] = Sequence(

diff --git a/src/sqlfluff/dialects/dialect_hive.py b/src/sqlfluff/dialects/dialect_hive.py
@@ -204,6 +204,30 @@
         ],
         before=Sequence("ORDER", "BY"),
     ),
+    GroupByClauseTerminatorGrammar=OneOf(
+        Sequence(
+            OneOf("ORDER", "CLUSTER", "DISTRIBUTE", "SORT"),
+            "BY",
+        ),
+        "LIMIT",
+        "HAVING",
+        "QUALIFY",
+        "WINDOW",
+    ),
+    HavingClauseTerminatorGrammar=OneOf(
+        Sequence(
+            OneOf(
+                "ORDER",
+                "CLUSTER",
+                "DISTRIBUTE",
+                "SORT",
+            ),
+            "BY",
+        ),
+        "LIMIT",
+        "QUALIFY",
+        "WINDOW",
+    ),
 )
 
 
@@ -838,36 +862,6 @@ class SelectClauseSegment(ansi.SelectClauseSegment):
     parse_grammar = ansi.SelectClauseSegment.parse_grammar.copy()
 
 
-class GroupByClauseSegment(ansi.GroupByClauseSegment):
-    """Overriding GroupByClauseSegment to allow for additional segment parsing."""
-
-    match_grammar = ansi.GroupByClauseSegment.match_grammar.copy()
-    match_grammar.terminator = match_grammar.terminator.copy(  # type: ignore
-        insert=[
-            Sequence("CLUSTER", "BY"),
-            Sequence("DISTRIBUTE", "BY"),
-            Sequence("SORT", "BY"),
-        ],
-        before=Ref.keyword("LIMIT"),
-    )
-    parse_grammar = ansi.GroupByClauseSegment.parse_grammar
-
-
-class HavingClauseSegment(ansi.HavingClauseSegment):
-    """Overriding HavingClauseSegment to allow for additional segment parsing."""
-
-    match_grammar = ansi.HavingClauseSegment.match_grammar.copy()
-    match_grammar.terminator = match_grammar.terminator.copy(  # type: ignore
-        insert=[
-            Sequence("CLUSTER", "BY"),
-            Sequence("DISTRIBUTE", "BY"),
-            Sequence("SORT", "BY"),
-        ],
-        before=Ref.keyword("LIMIT"),
-    )
-    parse_grammar = ansi.HavingClauseSegment.parse_grammar
-
-
 class SetExpressionSegment(ansi.SetExpressionSegment):
     """Overriding SetExpressionSegment to allow for additional segment parsing."""
 

diff --git a/src/sqlfluff/dialects/dialect_snowflake.py b/src/sqlfluff/dialects/dialect_snowflake.py
@@ -4,6 +4,7 @@
 
 Based on https://docs.snowflake.com/en/sql-reference-commands.html
 """
+from typing import Optional
 
 from sqlfluff.core.dialects import load_raw_dialect
 from sqlfluff.core.parser import (
@@ -645,6 +646,17 @@
         "MEASURES",
     ),
     TrimParametersGrammar=Nothing(),
+    GroupByClauseTerminatorGrammar=OneOf(
+        "ORDER", "LIMIT", "FETCH", "OFFSET", "HAVING", "QUALIFY", "WINDOW"
+    ),
+    HavingClauseTerminatorGrammar=OneOf(
+        Sequence("ORDER", "BY"),
+        "LIMIT",
+        "QUALIFY",
+        "WINDOW",
+        "FETCH",
+        "OFFSET",
+    ),
 )
 
 # Add all Snowflake keywords
@@ -828,14 +840,12 @@ class GroupByClauseSegment(ansi.GroupByClauseSegment):
     https://docs.snowflake.com/en/sql-reference/constructs/group-by.html
     """
 
-    match_grammar = StartsWith(
+    match_grammar: Matchable = StartsWith(
         Sequence("GROUP", "BY"),
-        terminator=OneOf(
-            "ORDER", "LIMIT", "FETCH", "OFFSET", "HAVING", "QUALIFY", "WINDOW"
-        ),
+        terminator=Ref("GroupByClauseTerminatorGrammar"),
         enforce_whitespace_preceding_terminator=True,
     )
-    parse_grammar = Sequence(
+    parse_grammar: Optional[Matchable] = Sequence(
         "GROUP",
         "BY",
         Indent,
@@ -5189,17 +5199,6 @@ class OrderByClauseSegment(ansi.OrderByClauseSegment):
     )
 
 
-class HavingClauseSegment(ansi.HavingClauseSegment):
-    """A `HAVING` clause."""
-
-    type = "having_clause"
-    match_grammar = ansi.HavingClauseSegment.match_grammar.copy()
-    match_grammar.terminator = match_grammar.terminator.copy(  # type: ignore
-        insert=[Ref.keyword("FETCH"), Ref.keyword("OFFSET")],
-    )
-    parse_grammar = ansi.HavingClauseSegment.parse_grammar
-
-
 class DropProcedureStatementSegment(BaseSegment):
     """A snowflake `DROP PROCEDURE ...` statement.
 

diff --git a/src/sqlfluff/dialects/dialect_sparksql.py b/src/sqlfluff/dialects/dialect_sparksql.py
@@ -266,6 +266,53 @@
         Ref("SingleQuotedIdentifierSegment"),
         Ref("BackQuotedIdentifierSegment"),
     ),
+    WhereClauseTerminatorGrammar=OneOf(
+        "LIMIT",
+        Sequence(
+            OneOf(
+                "CLUSTER",
+                "DISTRIBUTE",
+                "GROUP",
+                "ORDER",
+                "SORT",
+            ),
+            "BY",
+        ),
+        Sequence("ORDER", "BY"),
+        Sequence("DISTRIBUTE", "BY"),
+        "HAVING",
+        "QUALIFY",
+        "WINDOW",
+        "OVERLAPS",
+    ),
+    GroupByClauseTerminatorGrammar=OneOf(
+        Sequence(
+            OneOf(
+                "ORDER",
+                "DISTRIBUTE",
+                "CLUSTER",
+                "SORT",
+            ),
+            "BY",
+        ),
+        "LIMIT",
+        "HAVING",
+        "WINDOW",
+    ),
+    HavingClauseTerminatorGrammar=OneOf(
+        Sequence(
+            OneOf(
+                "ORDER",
+                "CLUSTER",
+                "DISTRIBUTE",
+                "SORT",
+            ),
+            "BY",
+        ),
+        "LIMIT",
+        "QUALIFY",
+        "WINDOW",
+    ),
 )
 
 sparksql_dialect.add(
@@ -1425,7 +1472,7 @@ class GroupByClauseSegment(ansi.GroupByClauseSegment):
 
     match_grammar = StartsWith(
         Sequence("GROUP", "BY"),
-        terminator=OneOf("ORDER", "LIMIT", "HAVING", "WINDOW"),
+        terminator=Ref("GroupByClauseTerminatorGrammar"),
         enforce_whitespace_preceding_terminator=True,
     )
 
@@ -1443,7 +1490,7 @@ class GroupByClauseSegment(ansi.GroupByClauseSegment):
                 Ref("CubeRollupClauseSegment"),
                 Ref("GroupingSetsClauseSegment"),
             ),
-            terminator=OneOf("ORDER", "LIMIT", "HAVING", "WINDOW"),
+            terminator=Ref("GroupByClauseTerminatorGrammar"),
         ),
         # TODO: New Rule
         #  Warn if CubeRollupClauseSegment and

diff --git a/test/fixtures/dialects/sparksql/issue_3484.sql b/test/fixtures/dialects/sparksql/issue_3484.sql
@@ -0,0 +1,10 @@
+-- https://github.com/sqlfluff/sqlfluff/issues/3484
+WITH cte AS (
+    SELECT *
+    FROM source
+    WHERE col1 = 0
+    DISTRIBUTE BY col1
+),
+
+SELECT *
+FROM cte
diff --git a/test/fixtures/dialects/sparksql/issue_3484.yml b/test/fixtures/dialects/sparksql/issue_3484.yml
@@ -0,0 +1,58 @@
+# YML test files are auto-generated from SQL files and should not be edited by
+# hand. To help enforce this, the "hash" field in the file must match a hash
+# computed by SQLFluff when running the tests. Please run
+# `python test/generate_parse_fixture_yml.py`  to generate them after adding or
+# altering SQL files.
+_hash: 7f337f5742ac96fa2fb84a92a52d4c994f73d350be13d6e645d59cbe945af2c4
+file:
+  statement:
+    with_compound_statement:
+      keyword: WITH
+      common_table_expression:
+        identifier: cte
+        keyword: AS
+        bracketed:
+          start_bracket: (
+          select_statement:
+            select_clause:
+              keyword: SELECT
+              select_clause_element:
+                wildcard_expression:
+                  wildcard_identifier:
+                    star: '*'
+            from_clause:
+              keyword: FROM
+              from_expression:
+                from_expression_element:
+                  table_expression:
+                    table_reference:
+                      identifier: source
+            where_clause:
+              keyword: WHERE
+              expression:
+                column_reference:
+                  identifier: col1
+                comparison_operator:
+                  raw_comparison_operator: '='
+                literal: '0'
+            distribute_by_clause:
+            - keyword: DISTRIBUTE
+            - keyword: BY
+            - column_reference:
+                identifier: col1
+          end_bracket: )
+      comma: ','
+      select_statement:
+        select_clause:
+          keyword: SELECT
+          select_clause_element:
+            wildcard_expression:
+              wildcard_identifier:
+                star: '*'
+        from_clause:
+          keyword: FROM
+          from_expression:
+            from_expression_element:
+              table_expression:
+                table_reference:
+                  identifier: cte
diff --git a/test/fixtures/dialects/sparksql/select_cluster_by.sql b/test/fixtures/dialects/sparksql/select_cluster_by.sql
@@ -30,3 +30,25 @@ SELECT
 FROM person
 CLUSTER BY
     LEFT(SUBSTRING_INDEX(name, ' ', -1), 1);
+
+SELECT
+    age,
+    name
+FROM person
+WHERE age <= 100
+CLUSTER BY age;
+
+SELECT
+    age,
+    name
+FROM person
+GROUP BY age
+CLUSTER BY age;
+
+SELECT
+    age,
+    name
+FROM person
+GROUP BY age
+HAVING COUNT(age) > 1
+CLUSTER BY age;