Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Spark3: Support for LATERAL VIEW clause #2687

Merged
merged 19 commits into from Feb 18, 2022
Merged
Show file tree
Hide file tree
Changes from 18 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
10 changes: 5 additions & 5 deletions src/sqlfluff/core/parser/grammar/anyof.py
Expand Up @@ -2,18 +2,18 @@

from typing import List, Optional, Tuple

from sqlfluff.core.parser.helpers import trim_non_code_segments
from sqlfluff.core.parser.match_result import MatchResult
from sqlfluff.core.parser.match_wrapper import match_wrapper
from sqlfluff.core.parser.match_logging import parse_match_logging
from sqlfluff.core.parser.context import ParseContext
from sqlfluff.core.parser.segments import BaseSegment, allow_ephemeral
from sqlfluff.core.parser.grammar.base import (
BaseGrammar,
MatchableType,
cached_method_for_parse_context,
)
from sqlfluff.core.parser.grammar.sequence import Sequence, Bracketed
from sqlfluff.core.parser.helpers import trim_non_code_segments
from sqlfluff.core.parser.match_logging import parse_match_logging
from sqlfluff.core.parser.match_result import MatchResult
from sqlfluff.core.parser.match_wrapper import match_wrapper
from sqlfluff.core.parser.segments import BaseSegment, allow_ephemeral


class AnyNumberOf(BaseGrammar):
Expand Down
68 changes: 65 additions & 3 deletions src/sqlfluff/dialects/dialect_spark3.py
Expand Up @@ -181,7 +181,7 @@
Sequence("CLUSTER", "BY"),
Sequence("DISTRIBUTE", "BY"),
Sequence("SORT", "BY"),
# TODO Add PIVOT, LATERAL VIEW, and DISTRIBUTE BY clauses
# TODO Add PIVOT, and DISTRIBUTE BY clauses
"HAVING",
"WINDOW",
Ref("SetOperatorSegment"),
Expand Down Expand Up @@ -1281,7 +1281,7 @@ class UnorderedSelectStatementSegment(BaseSegment):
parse_grammar = ansi_dialect.get_segment(
"UnorderedSelectStatementSegment"
).parse_grammar.copy(
# TODO Insert: PIVOT and LATERAL VIEW clauses
# TODO Insert: PIVOT clause
# Removing non-valid clauses that exist in ANSI dialect
remove=[Ref("OverlapsClauseSegment", optional=True)]
)
Expand Down Expand Up @@ -1515,6 +1515,36 @@ class SamplingExpressionSegment(BaseSegment):
)


@spark3_dialect.segment()
class LateralViewClauseSegment(BaseSegment):
"""A `LATERAL VIEW` like in a `FROM` clause.

https://spark.apache.org/docs/latest/sql-ref-syntax-qry-select-lateral-view.html
"""

type = "lateral_view_clause"

match_grammar = Sequence(
Indent,
"LATERAL",
"VIEW",
Ref.keyword("OUTER", optional=True),
Ref("FunctionSegment"),
# NB: AliasExpressionSegment is not used here for table
# or column alias because `AS` is optional within it
# (and in most scenarios). Here it's explicitly defined
# for when it is required and not allowed.
Ref("SingleIdentifierGrammar", optional=True),
Sequence(
"AS",
Delimited(
Ref("SingleIdentifierGrammar"),
),
),
Dedent,
)


# Auxiliary Statements
@spark3_dialect.segment()
class AddExecutablePackage(BaseSegment):
Expand Down Expand Up @@ -1726,7 +1756,10 @@ class AliasExpressionSegment(BaseSegment):
),
# just a table alias
Ref("SingleIdentifierGrammar"),
exclude=Ref("JoinTypeKeywords"),
exclude=OneOf(
"LATERAL",
Ref("JoinTypeKeywords"),
),
),
)

Expand Down Expand Up @@ -1844,3 +1877,32 @@ class FileReferenceSegment(BaseSegment):
# to match as a `TableReferenceSegment`
Ref("QuotedIdentifierSegment"),
)


@spark3_dialect.segment(replace=True)
class FromExpressionElementSegment(BaseSegment):
"""A table expression.

Enhanced from ANSI to allow for `LATERAL VIEW` clause
"""

type = "from_expression_element"
match_grammar = Sequence(
Ref("PreTableFunctionKeywordsGrammar", optional=True),
OptionallyBracketed(Ref("TableExpressionSegment")),
AnyNumberOf(Ref("LateralViewClauseSegment")),
OneOf(
Sequence(
Ref("AliasExpressionSegment"),
Ref("SamplingExpressionSegment"),
),
Ref("SamplingExpressionSegment"),
Ref("AliasExpressionSegment"),
optional=True,
),
Ref("PostTableExpressionGrammar", optional=True),
)

get_eventual_alias = ansi_dialect.get_segment(
"FromExpressionElementSegment"
).get_eventual_alias
16 changes: 8 additions & 8 deletions src/sqlfluff/rules/L026.py
Expand Up @@ -3,19 +3,19 @@
from typing import cast, List, Optional, Tuple

from sqlfluff.core.dialects.base import Dialect
from sqlfluff.core.dialects.common import AliasInfo
from sqlfluff.core.rules.analysis.select_crawler import (
Query as SelectCrawlerQuery,
SelectCrawler,
)
from sqlfluff.core.dialects.common import AliasInfo
from sqlfluff.core.rules.base import (
BaseRule,
LintResult,
RuleContext,
EvalResultType,
)
from sqlfluff.core.rules.functional import sp
from sqlfluff.core.rules.doc_decorators import document_configuration
from sqlfluff.core.rules.functional import sp
from sqlfluff.core.rules.reference import object_ref_matches_table


Expand All @@ -31,9 +31,9 @@ class Rule_L026(BaseRule):
"""References cannot reference objects not present in ``FROM`` clause.

.. note::
This rule is disabled by default for BigQuery due to its use of
structs which trigger false positives. It can be enabled with the
``force_enable = True`` flag.
This rule is disabled by default for BigQuery, Hive, Redshift, and Spark3
due to the use of structs and lateral views which trigger false positives.
It can be enabled with the ``force_enable = True`` flag.

**Anti-pattern**

Expand Down Expand Up @@ -64,7 +64,7 @@ def _eval(self, context: RuleContext) -> EvalResultType:
self.force_enable: bool

if (
context.dialect.name in ["bigquery", "hive", "redshift"]
context.dialect.name in ["bigquery", "hive", "redshift", "spark3"]
and not self.force_enable
):
return LintResult()
Expand Down Expand Up @@ -203,6 +203,6 @@ def _resolve_reference(
# Return the first segment rather than the string
anchor=tbl_refs[0][0].segments[0],
description=f"Reference {r.raw!r} refers to table/view "
"not found in the FROM clause or found in ancestor "
"statement.",
"not found in the FROM clause or found in ancestor "
"statement.",
)
48 changes: 48 additions & 0 deletions test/fixtures/dialects/spark3/select_from_lateral_view.sql
@@ -0,0 +1,48 @@
SELECT
id,
name,
age,
class,
address,
c_age,
d_age
FROM person
LATERAL VIEW EXPLODE(ARRAY(30, 60)) tbl_name AS c_age
LATERAL VIEW EXPLODE(ARRAY(40, 80)) AS d_age;

SELECT
c_age,
COUNT(*) AS record_count
FROM person
LATERAL VIEW EXPLODE(ARRAY(30, 60)) AS c_age
LATERAL VIEW EXPLODE(ARRAY(40, 80)) AS d_age
GROUP BY c_age;

SELECT
id,
name,
age,
class,
address,
c_age,
d_age
FROM person
LATERAL VIEW EXPLODE(ARRAY()) tbl_name AS c_age;

SELECT
id,
name,
age,
class,
address,
c_age
FROM person
LATERAL VIEW OUTER EXPLODE(ARRAY()) tbl_name AS c_age;

SELECT
person.id,
exploded_people.name,
exploded_people.age,
exploded_people.state
FROM person
LATERAL VIEW INLINE(array_of_structs) exploded_people AS name, age, state