Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Spark3: Support for LATERAL VIEW clause #2687

Merged
merged 19 commits into from Feb 18, 2022
Merged
Show file tree
Hide file tree
Changes from 16 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
10 changes: 5 additions & 5 deletions src/sqlfluff/core/parser/grammar/anyof.py
Expand Up @@ -2,18 +2,18 @@

from typing import List, Optional, Tuple

from sqlfluff.core.parser.helpers import trim_non_code_segments
from sqlfluff.core.parser.match_result import MatchResult
from sqlfluff.core.parser.match_wrapper import match_wrapper
from sqlfluff.core.parser.match_logging import parse_match_logging
from sqlfluff.core.parser.context import ParseContext
from sqlfluff.core.parser.segments import BaseSegment, allow_ephemeral
from sqlfluff.core.parser.grammar.base import (
BaseGrammar,
MatchableType,
cached_method_for_parse_context,
)
from sqlfluff.core.parser.grammar.sequence import Sequence, Bracketed
from sqlfluff.core.parser.helpers import trim_non_code_segments
from sqlfluff.core.parser.match_logging import parse_match_logging
from sqlfluff.core.parser.match_result import MatchResult
from sqlfluff.core.parser.match_wrapper import match_wrapper
from sqlfluff.core.parser.segments import BaseSegment, allow_ephemeral


class AnyNumberOf(BaseGrammar):
Expand Down
67 changes: 64 additions & 3 deletions src/sqlfluff/dialects/dialect_spark3.py
Expand Up @@ -181,7 +181,7 @@
Sequence("CLUSTER", "BY"),
Sequence("DISTRIBUTE", "BY"),
Sequence("SORT", "BY"),
# TODO Add PIVOT, LATERAL VIEW, and DISTRIBUTE BY clauses
# TODO Add PIVOT, and DISTRIBUTE BY clauses
"HAVING",
"WINDOW",
Ref("SetOperatorSegment"),
Expand Down Expand Up @@ -1281,7 +1281,7 @@ class UnorderedSelectStatementSegment(BaseSegment):
parse_grammar = ansi_dialect.get_segment(
"UnorderedSelectStatementSegment"
).parse_grammar.copy(
# TODO Insert: PIVOT and LATERAL VIEW clauses
# TODO Insert: PIVOT clause
# Removing non-valid clauses that exist in ANSI dialect
remove=[Ref("OverlapsClauseSegment", optional=True)]
)
Expand Down Expand Up @@ -1515,6 +1515,35 @@ class SamplingExpressionSegment(BaseSegment):
)


@spark3_dialect.segment()
class LateralViewClauseSegment(BaseSegment):
"""A `LATERAL VIEW` like in a `FROM` clause.

https://spark.apache.org/docs/latest/sql-ref-syntax-qry-select-lateral-view.html
"""

type = "lateral_view_clause"

match_grammar = Sequence(
Indent,
"LATERAL",
"VIEW",
Ref.keyword("OUTER", optional=True),
Ref("FunctionSegment"),
# NB: AliasExpressionSegment is not used here for table
# or column alias because `AS` is optional within it
# (and in most scenarios). Here it's explicitly defined
# for when it is required and not allowed.
Ref("SingleIdentifierGrammar", optional=True),
Sequence(
"AS",
Ref("SingleIdentifierGrammar"),
optional=True,
),
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My read of the above link is this should be:

Suggested change
Sequence(
"AS",
Ref("SingleIdentifierGrammar"),
optional=True,
),
Sequence(
"AS",
Delimited(
Ref("SingleIdentifierGrammar")
),
optional=True,
),

Is that so? And if so should we add a test case for multiple aliases?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry, not firing on all cylinders today.

        Sequence(
            "AS",
            Delimited(
                Ref("SingleIdentifierGrammar")
            ),
        ),

Should be correct. I forgot to remove the optional clause. I'll add a case for multiple column aliases

Dedent,
)


# Auxiliary Statements
@spark3_dialect.segment()
class AddExecutablePackage(BaseSegment):
Expand Down Expand Up @@ -1726,7 +1755,10 @@ class AliasExpressionSegment(BaseSegment):
),
# just a table alias
Ref("SingleIdentifierGrammar"),
exclude=Ref("JoinTypeKeywords"),
exclude=OneOf(
"LATERAL",
Ref("JoinTypeKeywords"),
),
),
)

Expand Down Expand Up @@ -1844,3 +1876,32 @@ class FileReferenceSegment(BaseSegment):
# to match as a `TableReferenceSegment`
Ref("QuotedIdentifierSegment"),
)


@spark3_dialect.segment(replace=True)
class FromExpressionElementSegment(BaseSegment):
"""A table expression.

Enhanced from ANSI to allow for `LATERAL VIEW` clause
"""

type = "from_expression_element"
match_grammar = Sequence(
Ref("PreTableFunctionKeywordsGrammar", optional=True),
OptionallyBracketed(Ref("TableExpressionSegment")),
AnyNumberOf(Ref("LateralViewClauseSegment")),
OneOf(
Sequence(
Ref("AliasExpressionSegment"),
Ref("SamplingExpressionSegment"),
),
Ref("SamplingExpressionSegment"),
Ref("AliasExpressionSegment"),
optional=True,
),
Ref("PostTableExpressionGrammar", optional=True),
)

get_eventual_alias = ansi_dialect.get_segment(
"FromExpressionElementSegment"
).get_eventual_alias
40 changes: 40 additions & 0 deletions test/fixtures/dialects/spark3/select_from_lateral_view.sql
@@ -0,0 +1,40 @@
SELECT
id,
name,
age,
class,
address,
c_age,
d_age
FROM person
LATERAL VIEW EXPLODE(ARRAY(30, 60)) tbl_name AS c_age
LATERAL VIEW EXPLODE(ARRAY(40, 80)) AS d_age;

SELECT
c_age,
COUNT(*) AS record_count
FROM person
LATERAL VIEW EXPLODE(ARRAY(30, 60)) AS c_age
LATERAL VIEW EXPLODE(ARRAY(40, 80)) AS d_age
GROUP BY c_age;

SELECT
id,
name,
age,
class,
address,
c_age,
d_age
FROM person
LATERAL VIEW EXPLODE(ARRAY()) tbl_name AS c_age;

SELECT
id,
name,
age,
class,
address,
c_age
FROM person
LATERAL VIEW OUTER EXPLODE(ARRAY()) tbl_name AS c_age;