Skip to content

Commit

Permalink
Spark3: Support for LATERAL VIEW clause (#2687)
Browse files Browse the repository at this point in the history
* allow  named argument to accept an iterable

* updated to allow for LATERAL VIEW

* black

* use OneOf instead of list for exclude

* use get_segment to define get_eventual_alias

* remove unused imports

* PR feedback

* black

* Update src/sqlfluff/dialects/dialect_spark3.py

Co-authored-by: Barry Pollard <barry_pollard@hotmail.com>

* remove AnyNumberOf AliasExpressionSegments

* black

* remove two invalid test cases and update LateralViewClauseSegment

* refresh yml

* updates to LateralViewClauseSegment

* excldue spark3 from L026 by default

* black

Co-authored-by: Barry Pollard <barry_pollard@hotmail.com>
  • Loading branch information
R7L208 and tunetheweb committed Feb 18, 2022
1 parent 0307c76 commit ef27896
Show file tree
Hide file tree
Showing 5 changed files with 469 additions and 14 deletions.
10 changes: 5 additions & 5 deletions src/sqlfluff/core/parser/grammar/anyof.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,18 +2,18 @@

from typing import List, Optional, Tuple

from sqlfluff.core.parser.helpers import trim_non_code_segments
from sqlfluff.core.parser.match_result import MatchResult
from sqlfluff.core.parser.match_wrapper import match_wrapper
from sqlfluff.core.parser.match_logging import parse_match_logging
from sqlfluff.core.parser.context import ParseContext
from sqlfluff.core.parser.segments import BaseSegment, allow_ephemeral
from sqlfluff.core.parser.grammar.base import (
BaseGrammar,
MatchableType,
cached_method_for_parse_context,
)
from sqlfluff.core.parser.grammar.sequence import Sequence, Bracketed
from sqlfluff.core.parser.helpers import trim_non_code_segments
from sqlfluff.core.parser.match_logging import parse_match_logging
from sqlfluff.core.parser.match_result import MatchResult
from sqlfluff.core.parser.match_wrapper import match_wrapper
from sqlfluff.core.parser.segments import BaseSegment, allow_ephemeral


class AnyNumberOf(BaseGrammar):
Expand Down
68 changes: 65 additions & 3 deletions src/sqlfluff/dialects/dialect_spark3.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,7 @@
Sequence("CLUSTER", "BY"),
Sequence("DISTRIBUTE", "BY"),
Sequence("SORT", "BY"),
# TODO Add PIVOT, LATERAL VIEW, and DISTRIBUTE BY clauses
# TODO Add PIVOT, and DISTRIBUTE BY clauses
"HAVING",
"WINDOW",
Ref("SetOperatorSegment"),
Expand Down Expand Up @@ -1281,7 +1281,7 @@ class UnorderedSelectStatementSegment(BaseSegment):
parse_grammar = ansi_dialect.get_segment(
"UnorderedSelectStatementSegment"
).parse_grammar.copy(
# TODO Insert: PIVOT and LATERAL VIEW clauses
# TODO Insert: PIVOT clause
# Removing non-valid clauses that exist in ANSI dialect
remove=[Ref("OverlapsClauseSegment", optional=True)]
)
Expand Down Expand Up @@ -1515,6 +1515,36 @@ class SamplingExpressionSegment(BaseSegment):
)


@spark3_dialect.segment()
class LateralViewClauseSegment(BaseSegment):
"""A `LATERAL VIEW` like in a `FROM` clause.
https://spark.apache.org/docs/latest/sql-ref-syntax-qry-select-lateral-view.html
"""

type = "lateral_view_clause"

match_grammar = Sequence(
Indent,
"LATERAL",
"VIEW",
Ref.keyword("OUTER", optional=True),
Ref("FunctionSegment"),
# NB: AliasExpressionSegment is not used here for table
# or column alias because `AS` is optional within it
# (and in most scenarios). Here it's explicitly defined
# for when it is required and not allowed.
Ref("SingleIdentifierGrammar", optional=True),
Sequence(
"AS",
Delimited(
Ref("SingleIdentifierGrammar"),
),
),
Dedent,
)


# Auxiliary Statements
@spark3_dialect.segment()
class AddExecutablePackage(BaseSegment):
Expand Down Expand Up @@ -1726,7 +1756,10 @@ class AliasExpressionSegment(BaseSegment):
),
# just a table alias
Ref("SingleIdentifierGrammar"),
exclude=Ref("JoinTypeKeywords"),
exclude=OneOf(
"LATERAL",
Ref("JoinTypeKeywords"),
),
),
)

Expand Down Expand Up @@ -1844,3 +1877,32 @@ class FileReferenceSegment(BaseSegment):
# to match as a `TableReferenceSegment`
Ref("QuotedIdentifierSegment"),
)


@spark3_dialect.segment(replace=True)
class FromExpressionElementSegment(BaseSegment):
"""A table expression.
Enhanced from ANSI to allow for `LATERAL VIEW` clause
"""

type = "from_expression_element"
match_grammar = Sequence(
Ref("PreTableFunctionKeywordsGrammar", optional=True),
OptionallyBracketed(Ref("TableExpressionSegment")),
AnyNumberOf(Ref("LateralViewClauseSegment")),
OneOf(
Sequence(
Ref("AliasExpressionSegment"),
Ref("SamplingExpressionSegment"),
),
Ref("SamplingExpressionSegment"),
Ref("AliasExpressionSegment"),
optional=True,
),
Ref("PostTableExpressionGrammar", optional=True),
)

get_eventual_alias = ansi_dialect.get_segment(
"FromExpressionElementSegment"
).get_eventual_alias
12 changes: 6 additions & 6 deletions src/sqlfluff/rules/L026.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,19 +3,19 @@
from typing import cast, List, Optional, Tuple

from sqlfluff.core.dialects.base import Dialect
from sqlfluff.core.dialects.common import AliasInfo
from sqlfluff.core.rules.analysis.select_crawler import (
Query as SelectCrawlerQuery,
SelectCrawler,
)
from sqlfluff.core.dialects.common import AliasInfo
from sqlfluff.core.rules.base import (
BaseRule,
LintResult,
RuleContext,
EvalResultType,
)
from sqlfluff.core.rules.functional import sp
from sqlfluff.core.rules.doc_decorators import document_configuration
from sqlfluff.core.rules.functional import sp
from sqlfluff.core.rules.reference import object_ref_matches_table


Expand All @@ -31,9 +31,9 @@ class Rule_L026(BaseRule):
"""References cannot reference objects not present in ``FROM`` clause.
.. note::
This rule is disabled by default for BigQuery due to its use of
structs which trigger false positives. It can be enabled with the
``force_enable = True`` flag.
This rule is disabled by default for BigQuery, Hive, Redshift, and Spark3
due to the use of structs and lateral views which trigger false positives.
It can be enabled with the ``force_enable = True`` flag.
**Anti-pattern**
Expand Down Expand Up @@ -64,7 +64,7 @@ def _eval(self, context: RuleContext) -> EvalResultType:
self.force_enable: bool

if (
context.dialect.name in ["bigquery", "hive", "redshift"]
context.dialect.name in ["bigquery", "hive", "redshift", "spark3"]
and not self.force_enable
):
return LintResult()
Expand Down
48 changes: 48 additions & 0 deletions test/fixtures/dialects/spark3/select_from_lateral_view.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
SELECT
id,
name,
age,
class,
address,
c_age,
d_age
FROM person
LATERAL VIEW EXPLODE(ARRAY(30, 60)) tbl_name AS c_age
LATERAL VIEW EXPLODE(ARRAY(40, 80)) AS d_age;

SELECT
c_age,
COUNT(*) AS record_count
FROM person
LATERAL VIEW EXPLODE(ARRAY(30, 60)) AS c_age
LATERAL VIEW EXPLODE(ARRAY(40, 80)) AS d_age
GROUP BY c_age;

SELECT
id,
name,
age,
class,
address,
c_age,
d_age
FROM person
LATERAL VIEW EXPLODE(ARRAY()) tbl_name AS c_age;

SELECT
id,
name,
age,
class,
address,
c_age
FROM person
LATERAL VIEW OUTER EXPLODE(ARRAY()) tbl_name AS c_age;

SELECT
person.id,
exploded_people.name,
exploded_people.age,
exploded_people.state
FROM person
LATERAL VIEW INLINE(array_of_structs) exploded_people AS name, age, state

0 comments on commit ef27896

Please sign in to comment.