Skip to content

Commit

Permalink
Spark3 join types (#1942)
Browse files Browse the repository at this point in the history
* Add Spark3 join types

* black format and linting

* replace get_eventual_alias with method from ansi
  • Loading branch information
jpy-git committed Nov 19, 2021
1 parent d3c40ff commit 2d88f8a
Show file tree
Hide file tree
Showing 3 changed files with 234 additions and 1 deletion.
80 changes: 79 additions & 1 deletion src/sqlfluff/dialects/dialect_spark3.py
Expand Up @@ -15,8 +15,11 @@
AnyNumberOf,
BaseSegment,
Bracketed,
Delimited,
CommentSegment,
Conditional,
Dedent,
Delimited,
Indent,
NamedParser,
OneOf,
OptionallyBracketed,
Expand Down Expand Up @@ -693,3 +696,78 @@ class StatementSegment(BaseSegment):
Ref("DropModelStatementSegment"),
],
)


@spark3_dialect.segment(replace=True)
class JoinClauseSegment(BaseSegment):
"""Any number of join clauses, including the `JOIN` keyword.
https://spark.apache.org/docs/3.0.0/sql-ref-syntax-qry-select-join.html
TODO: Add NATURAL JOIN syntax.
"""

type = "join_clause"
match_grammar = Sequence(
# NB These qualifiers are optional
# TODO: Allow nested joins like:
# ....FROM S1.T1 t1 LEFT JOIN ( S2.T2 t2 JOIN S3.T3 t3 ON t2.col1=t3.col1) ON tab1.col1 = tab2.col1
OneOf(
"CROSS",
"INNER",
Sequence(
OneOf(
"FULL",
"LEFT",
"RIGHT",
),
Ref.keyword("OUTER", optional=True),
),
Sequence(
Ref.keyword("LEFT", optional=True),
"SEMI",
),
Sequence(
Ref.keyword("LEFT", optional=True),
"ANTI",
),
optional=True,
),
Ref("JoinKeywords"),
Indent,
Sequence(
Ref("FromExpressionElementSegment"),
Conditional(Dedent, indented_using_on=False),
# NB: this is optional
OneOf(
# ON clause
Ref("JoinOnConditionSegment"),
# USING clause
Sequence(
"USING",
Indent,
Bracketed(
# NB: We don't use BracketedColumnReferenceListGrammar
# here because we're just using SingleIdentifierGrammar,
# rather than ObjectReferenceSegment or ColumnReferenceSegment.
# This is a) so that we don't lint it as a reference and
# b) because the column will probably be returned anyway
# during parsing.
Delimited(
Ref("SingleIdentifierGrammar"),
ephemeral_name="UsingClauseContents",
)
),
Dedent,
),
# Unqualified joins *are* allowed. They just might not
# be a good idea.
optional=True,
),
Conditional(Indent, indented_using_on=False),
),
Dedent,
)

get_eventual_alias = ansi_dialect.get_segment(
"JoinClauseSegment"
).get_eventual_alias
4 changes: 4 additions & 0 deletions test/fixtures/dialects/spark3/join_types.sql
@@ -0,0 +1,4 @@
SELECT * FROM employee SEMI JOIN department ON employee.deptno = department.deptno;
SELECT * FROM employee ANTI JOIN department ON employee.deptno = department.deptno;
SELECT * FROM employee LEFT SEMI JOIN department ON employee.deptno = department.deptno;
SELECT * FROM employee LEFT ANTI JOIN department ON employee.deptno = department.deptno;
151 changes: 151 additions & 0 deletions test/fixtures/dialects/spark3/join_types.yml
@@ -0,0 +1,151 @@
# YML test files are auto-generated from SQL files and should not be edited by
# hand. To help enforce this, the "hash" field in the file must match a hash
# computed by SQLFluff when running the tests. Please run
# `python test/generate_parse_fixture_yml.py` to generate them after adding or
# altering SQL files.
_hash: 083b7cabc2d0da7873019cf780c8a8e0046fd187223d5da951159dbaf4d03ca4
file:
- base:
select_statement:
select_clause:
keyword: SELECT
select_clause_element:
wildcard_expression:
wildcard_identifier:
star: '*'
from_clause:
keyword: FROM
from_expression:
from_expression_element:
table_expression:
table_reference:
identifier: employee
alias_expression:
identifier: SEMI
join_clause:
keyword: JOIN
from_expression_element:
table_expression:
table_reference:
identifier: department
join_on_condition:
keyword: 'ON'
expression:
- column_reference:
- identifier: employee
- dot: .
- identifier: deptno
- comparison_operator: '='
- column_reference:
- identifier: department
- dot: .
- identifier: deptno
- statement_terminator: ;
- base:
select_statement:
select_clause:
keyword: SELECT
select_clause_element:
wildcard_expression:
wildcard_identifier:
star: '*'
from_clause:
keyword: FROM
from_expression:
from_expression_element:
table_expression:
table_reference:
identifier: employee
alias_expression:
identifier: ANTI
join_clause:
keyword: JOIN
from_expression_element:
table_expression:
table_reference:
identifier: department
join_on_condition:
keyword: 'ON'
expression:
- column_reference:
- identifier: employee
- dot: .
- identifier: deptno
- comparison_operator: '='
- column_reference:
- identifier: department
- dot: .
- identifier: deptno
- statement_terminator: ;
- base:
select_statement:
select_clause:
keyword: SELECT
select_clause_element:
wildcard_expression:
wildcard_identifier:
star: '*'
from_clause:
keyword: FROM
from_expression:
from_expression_element:
table_expression:
table_reference:
identifier: employee
join_clause:
- keyword: LEFT
- keyword: SEMI
- keyword: JOIN
- from_expression_element:
table_expression:
table_reference:
identifier: department
- join_on_condition:
keyword: 'ON'
expression:
- column_reference:
- identifier: employee
- dot: .
- identifier: deptno
- comparison_operator: '='
- column_reference:
- identifier: department
- dot: .
- identifier: deptno
- statement_terminator: ;
- base:
select_statement:
select_clause:
keyword: SELECT
select_clause_element:
wildcard_expression:
wildcard_identifier:
star: '*'
from_clause:
keyword: FROM
from_expression:
from_expression_element:
table_expression:
table_reference:
identifier: employee
join_clause:
- keyword: LEFT
- keyword: ANTI
- keyword: JOIN
- from_expression_element:
table_expression:
table_reference:
identifier: department
- join_on_condition:
keyword: 'ON'
expression:
- column_reference:
- identifier: employee
- dot: .
- identifier: deptno
- comparison_operator: '='
- column_reference:
- identifier: department
- dot: .
- identifier: deptno
- statement_terminator: ;

0 comments on commit 2d88f8a

Please sign in to comment.