diff --git a/src/sqlfluff/dialects/dialect_spark3.py b/src/sqlfluff/dialects/dialect_spark3.py index 24ad3817604..afb3c5617da 100644 --- a/src/sqlfluff/dialects/dialect_spark3.py +++ b/src/sqlfluff/dialects/dialect_spark3.py @@ -15,8 +15,11 @@ AnyNumberOf, BaseSegment, Bracketed, - Delimited, CommentSegment, + Conditional, + Dedent, + Delimited, + Indent, NamedParser, OneOf, OptionallyBracketed, @@ -693,3 +696,78 @@ class StatementSegment(BaseSegment): Ref("DropModelStatementSegment"), ], ) + + +@spark3_dialect.segment(replace=True) +class JoinClauseSegment(BaseSegment): + """Any number of join clauses, including the `JOIN` keyword. + + https://spark.apache.org/docs/3.0.0/sql-ref-syntax-qry-select-join.html + TODO: Add NATURAL JOIN syntax. + """ + + type = "join_clause" + match_grammar = Sequence( + # NB These qualifiers are optional + # TODO: Allow nested joins like: + # ....FROM S1.T1 t1 LEFT JOIN ( S2.T2 t2 JOIN S3.T3 t3 ON t2.col1=t3.col1) ON tab1.col1 = tab2.col1 + OneOf( + "CROSS", + "INNER", + Sequence( + OneOf( + "FULL", + "LEFT", + "RIGHT", + ), + Ref.keyword("OUTER", optional=True), + ), + Sequence( + Ref.keyword("LEFT", optional=True), + "SEMI", + ), + Sequence( + Ref.keyword("LEFT", optional=True), + "ANTI", + ), + optional=True, + ), + Ref("JoinKeywords"), + Indent, + Sequence( + Ref("FromExpressionElementSegment"), + Conditional(Dedent, indented_using_on=False), + # NB: this is optional + OneOf( + # ON clause + Ref("JoinOnConditionSegment"), + # USING clause + Sequence( + "USING", + Indent, + Bracketed( + # NB: We don't use BracketedColumnReferenceListGrammar + # here because we're just using SingleIdentifierGrammar, + # rather than ObjectReferenceSegment or ColumnReferenceSegment. + # This is a) so that we don't lint it as a reference and + # b) because the column will probably be returned anyway + # during parsing. + Delimited( + Ref("SingleIdentifierGrammar"), + ephemeral_name="UsingClauseContents", + ) + ), + Dedent, + ), + # Unqualified joins *are* allowed. They just might not + # be a good idea. + optional=True, + ), + Conditional(Indent, indented_using_on=False), + ), + Dedent, + ) + + get_eventual_alias = ansi_dialect.get_segment( + "JoinClauseSegment" + ).get_eventual_alias diff --git a/test/fixtures/dialects/spark3/join_types.sql b/test/fixtures/dialects/spark3/join_types.sql new file mode 100644 index 00000000000..c3174d9d9c7 --- /dev/null +++ b/test/fixtures/dialects/spark3/join_types.sql @@ -0,0 +1,4 @@ +SELECT * FROM employee SEMI JOIN department ON employee.deptno = department.deptno; +SELECT * FROM employee ANTI JOIN department ON employee.deptno = department.deptno; +SELECT * FROM employee LEFT SEMI JOIN department ON employee.deptno = department.deptno; +SELECT * FROM employee LEFT ANTI JOIN department ON employee.deptno = department.deptno; diff --git a/test/fixtures/dialects/spark3/join_types.yml b/test/fixtures/dialects/spark3/join_types.yml new file mode 100644 index 00000000000..b41eef150fa --- /dev/null +++ b/test/fixtures/dialects/spark3/join_types.yml @@ -0,0 +1,151 @@ +# YML test files are auto-generated from SQL files and should not be edited by +# hand. To help enforce this, the "hash" field in the file must match a hash +# computed by SQLFluff when running the tests. Please run +# `python test/generate_parse_fixture_yml.py` to generate them after adding or +# altering SQL files. +_hash: 083b7cabc2d0da7873019cf780c8a8e0046fd187223d5da951159dbaf4d03ca4 +file: +- base: + select_statement: + select_clause: + keyword: SELECT + select_clause_element: + wildcard_expression: + wildcard_identifier: + star: '*' + from_clause: + keyword: FROM + from_expression: + from_expression_element: + table_expression: + table_reference: + identifier: employee + alias_expression: + identifier: SEMI + join_clause: + keyword: JOIN + from_expression_element: + table_expression: + table_reference: + identifier: department + join_on_condition: + keyword: 'ON' + expression: + - column_reference: + - identifier: employee + - dot: . + - identifier: deptno + - comparison_operator: '=' + - column_reference: + - identifier: department + - dot: . + - identifier: deptno +- statement_terminator: ; +- base: + select_statement: + select_clause: + keyword: SELECT + select_clause_element: + wildcard_expression: + wildcard_identifier: + star: '*' + from_clause: + keyword: FROM + from_expression: + from_expression_element: + table_expression: + table_reference: + identifier: employee + alias_expression: + identifier: ANTI + join_clause: + keyword: JOIN + from_expression_element: + table_expression: + table_reference: + identifier: department + join_on_condition: + keyword: 'ON' + expression: + - column_reference: + - identifier: employee + - dot: . + - identifier: deptno + - comparison_operator: '=' + - column_reference: + - identifier: department + - dot: . + - identifier: deptno +- statement_terminator: ; +- base: + select_statement: + select_clause: + keyword: SELECT + select_clause_element: + wildcard_expression: + wildcard_identifier: + star: '*' + from_clause: + keyword: FROM + from_expression: + from_expression_element: + table_expression: + table_reference: + identifier: employee + join_clause: + - keyword: LEFT + - keyword: SEMI + - keyword: JOIN + - from_expression_element: + table_expression: + table_reference: + identifier: department + - join_on_condition: + keyword: 'ON' + expression: + - column_reference: + - identifier: employee + - dot: . + - identifier: deptno + - comparison_operator: '=' + - column_reference: + - identifier: department + - dot: . + - identifier: deptno +- statement_terminator: ; +- base: + select_statement: + select_clause: + keyword: SELECT + select_clause_element: + wildcard_expression: + wildcard_identifier: + star: '*' + from_clause: + keyword: FROM + from_expression: + from_expression_element: + table_expression: + table_reference: + identifier: employee + join_clause: + - keyword: LEFT + - keyword: ANTI + - keyword: JOIN + - from_expression_element: + table_expression: + table_reference: + identifier: department + - join_on_condition: + keyword: 'ON' + expression: + - column_reference: + - identifier: employee + - dot: . + - identifier: deptno + - comparison_operator: '=' + - column_reference: + - identifier: department + - dot: . + - identifier: deptno +- statement_terminator: ;