Skip to content

Commit

Permalink
Spark3: Support to handle CACHE AND UNCACHE auxiliary statements (#…
Browse files Browse the repository at this point in the history
…2814)

* add support for auxiliary cache and uncache statements

* new test cases for fixing options grammar to allow more than just strings

Co-authored-by: Barry Pollard <barry@tunetheweb.com>
  • Loading branch information
R7L208 and tunetheweb committed Mar 9, 2022
1 parent d7ef8ba commit 8bcd099
Show file tree
Hide file tree
Showing 9 changed files with 335 additions and 15 deletions.
77 changes: 71 additions & 6 deletions src/sqlfluff/dialects/dialect_spark3.py
Expand Up @@ -292,18 +292,27 @@
"WHL", KeywordSegment, name="whl", type="file_keyword"
),
# Add relevant Hive Grammar
BracketedPropertyListGrammar=hive_dialect.get_grammar(
"BracketedPropertyListGrammar"
),
CommentGrammar=hive_dialect.get_grammar("CommentGrammar"),
LocationGrammar=hive_dialect.get_grammar("LocationGrammar"),
PropertyGrammar=hive_dialect.get_grammar("PropertyGrammar"),
SerdePropertiesGrammar=hive_dialect.get_grammar("SerdePropertiesGrammar"),
StoredAsGrammar=hive_dialect.get_grammar("StoredAsGrammar"),
StoredByGrammar=hive_dialect.get_grammar("StoredByGrammar"),
StorageFormatGrammar=hive_dialect.get_grammar("StorageFormatGrammar"),
TerminatedByGrammar=hive_dialect.get_grammar("TerminatedByGrammar"),
# Add Spark Grammar
PropertyGrammar=Sequence(
OneOf(
Ref("LiteralGrammar"),
Ref("SingleIdentifierGrammar"),
),
Ref("EqualsSegment", optional=True),
OneOf(
Ref("LiteralGrammar"),
Ref("SingleIdentifierGrammar"),
),
),
BracketedPropertyListGrammar=Bracketed(Delimited(Ref("PropertyGrammar"))),
OptionsGrammar=Sequence("OPTIONS", Ref("BracketedPropertyListGrammar")),
BucketSpecGrammar=Sequence(
Ref("ClusteredBySpecGrammar"),
Ref("SortedBySpecGrammar", optional=True),
Expand Down Expand Up @@ -832,7 +841,7 @@ class CreateTableStatementSegment(BaseSegment):
Sequence("USING", Ref("DataSourceFormatGrammar"), optional=True),
Ref("RowFormatClauseSegment", optional=True),
Ref("StoredAsGrammar", optional=True),
Sequence("OPTIONS", Ref("BracketedPropertyListGrammar"), optional=True),
Ref("OptionsGrammar", optional=True),
Ref("PartitionSpecGrammar", optional=True),
Ref("BucketSpecGrammar", optional=True),
AnyNumberOf(
Expand Down Expand Up @@ -1023,7 +1032,7 @@ class InsertOverwriteDirectorySegment(BaseSegment):
Ref("QuotedLiteralSegment", optional=True),
"USING",
Ref("DataSourceFormatGrammar"),
Sequence("OPTIONS", Ref("BracketedPropertyListGrammar"), optional=True),
Ref("OptionsGrammar", optional=True),
OneOf(
AnyNumberOf(
Ref("ValuesClauseSegment"),
Expand Down Expand Up @@ -1826,6 +1835,41 @@ class AnalyzeTableSegment(BaseSegment):
)


@spark3_dialect.segment()
class CacheTableSegment(BaseSegment):
"""A `CACHE TABLE` statement.
https://spark.apache.org/docs/latest/sql-ref-syntax-aux-cache-cache-table.html
"""

type = "cache_table"

match_grammar = Sequence(
"CACHE",
Ref.keyword("LAZY", optional=True),
"TABLE",
Ref("TableReferenceSegment"),
Ref("OptionsGrammar", optional=True),
Ref.keyword("AS", optional=True),
Ref("SelectableGrammar"),
)


@spark3_dialect.segment()
class ClearCacheSegment(BaseSegment):
"""A `CLEAR CACHE` statement.
https://spark.apache.org/docs/latest/sql-ref-syntax-aux-cache-clear-cache.html
"""

type = "clear_cache"

match_grammar = Sequence(
"CLEAR",
"CACHE",
)


@spark3_dialect.segment()
class ListFileSegment(BaseSegment):
"""A `LIST {FILE | FILES}` statement.
Expand Down Expand Up @@ -1905,6 +1949,23 @@ class RefreshFunctionStatementSegment(BaseSegment):
)


@spark3_dialect.segment()
class UncacheTableSegment(BaseSegment):
"""AN `UNCACHE TABLE` statement.
https://spark.apache.org/docs/latest/sql-ref-syntax-aux-cache-uncache-table.html
"""

type = "uncache_table"

match_grammar = Sequence(
"UNCACHE",
"TABLE",
Ref("IfExistsGrammar", optional=True),
Ref("TableReferenceSegment"),
)


@spark3_dialect.segment(replace=True)
class StatementSegment(BaseSegment):
"""Overriding StatementSegment to allow for additional segment parsing."""
Expand All @@ -1927,11 +1988,14 @@ class StatementSegment(BaseSegment):
Ref("AddFileSegment"),
Ref("AddJarSegment"),
Ref("AnalyzeTableSegment"),
Ref("CacheTableSegment"),
Ref("ClearCacheSegment"),
Ref("ListFileSegment"),
Ref("ListJarSegment"),
Ref("RefreshStatementSegment"),
Ref("RefreshTableStatementSegment"),
Ref("RefreshFunctionStatementSegment"),
Ref("UncacheTableSegment"),
# Data Manipulation Statements
Ref("InsertOverwriteDirectorySegment"),
Ref("InsertOverwriteDirectoryHiveFmtSegment"),
Expand Down Expand Up @@ -2126,6 +2190,7 @@ class TableExpressionSegment(BaseSegment):
"""

type = "table_expression"

match_grammar = OneOf(
Ref("ValuesClauseSegment"),
Ref("BareFunctionSegment"),
Expand Down
18 changes: 18 additions & 0 deletions test/fixtures/dialects/spark3/cache_table.sql
@@ -0,0 +1,18 @@
CACHE TABLE TESTCACHE
OPTIONS ('storageLevel' 'DISK_ONLY')
SELECT
A,
B
FROM TESTDATA;

CACHE LAZY TABLE TESTCACHE
OPTIONS ('storageLevel' 'DISK_ONLY')
SELECT A FROM TESTDATA;

CACHE TABLE TESTCACHE
OPTIONS ('storageLevel' 'DISK_ONLY') AS
SELECT A FROM TESTDATA;

CACHE TABLE TESTCACHE
OPTIONS ('storageLevel' = 'DISK_ONLY') AS
SELECT A FROM TESTDATA;
120 changes: 120 additions & 0 deletions test/fixtures/dialects/spark3/cache_table.yml
@@ -0,0 +1,120 @@
# YML test files are auto-generated from SQL files and should not be edited by
# hand. To help enforce this, the "hash" field in the file must match a hash
# computed by SQLFluff when running the tests. Please run
# `python test/generate_parse_fixture_yml.py` to generate them after adding or
# altering SQL files.
_hash: 9893a5321046754c08de6b201414c0d15e03c66e2cfca6e0e0de7a4b46403e32
file:
- statement:
cache_table:
- keyword: CACHE
- keyword: TABLE
- table_reference:
identifier: TESTCACHE
- keyword: OPTIONS
- bracketed:
- start_bracket: (
- literal: "'storageLevel'"
- literal: "'DISK_ONLY'"
- end_bracket: )
- select_statement:
select_clause:
- keyword: SELECT
- select_clause_element:
column_reference:
identifier: A
- comma: ','
- select_clause_element:
column_reference:
identifier: B
from_clause:
keyword: FROM
from_expression:
from_expression_element:
table_expression:
table_reference:
identifier: TESTDATA
- statement_terminator: ;
- statement:
cache_table:
- keyword: CACHE
- keyword: LAZY
- keyword: TABLE
- table_reference:
identifier: TESTCACHE
- keyword: OPTIONS
- bracketed:
- start_bracket: (
- literal: "'storageLevel'"
- literal: "'DISK_ONLY'"
- end_bracket: )
- select_statement:
select_clause:
keyword: SELECT
select_clause_element:
column_reference:
identifier: A
from_clause:
keyword: FROM
from_expression:
from_expression_element:
table_expression:
table_reference:
identifier: TESTDATA
- statement_terminator: ;
- statement:
cache_table:
- keyword: CACHE
- keyword: TABLE
- table_reference:
identifier: TESTCACHE
- keyword: OPTIONS
- bracketed:
- start_bracket: (
- literal: "'storageLevel'"
- literal: "'DISK_ONLY'"
- end_bracket: )
- keyword: AS
- select_statement:
select_clause:
keyword: SELECT
select_clause_element:
column_reference:
identifier: A
from_clause:
keyword: FROM
from_expression:
from_expression_element:
table_expression:
table_reference:
identifier: TESTDATA
- statement_terminator: ;
- statement:
cache_table:
- keyword: CACHE
- keyword: TABLE
- table_reference:
identifier: TESTCACHE
- keyword: OPTIONS
- bracketed:
- start_bracket: (
- literal: "'storageLevel'"
- comparison_operator:
raw_comparison_operator: '='
- literal: "'DISK_ONLY'"
- end_bracket: )
- keyword: AS
- select_statement:
select_clause:
keyword: SELECT
select_clause_element:
column_reference:
identifier: A
from_clause:
keyword: FROM
from_expression:
from_expression_element:
table_expression:
table_reference:
identifier: TESTDATA
- statement_terminator: ;
1 change: 1 addition & 0 deletions test/fixtures/dialects/spark3/clear_cache.sql
@@ -0,0 +1 @@
CLEAR CACHE;
12 changes: 12 additions & 0 deletions test/fixtures/dialects/spark3/clear_cache.yml
@@ -0,0 +1,12 @@
# YML test files are auto-generated from SQL files and should not be edited by
# hand. To help enforce this, the "hash" field in the file must match a hash
# computed by SQLFluff when running the tests. Please run
# `python test/generate_parse_fixture_yml.py` to generate them after adding or
# altering SQL files.
_hash: 88187c4784eba3d8ed01791d6bb354c7e716f93c2526abf5e4b76d77e47499c9
file:
statement:
clear_cache:
- keyword: CLEAR
- keyword: CACHE
statement_terminator: ;
14 changes: 12 additions & 2 deletions test/fixtures/dialects/spark3/insert_overwrite_directory.sql
@@ -1,11 +1,21 @@
INSERT OVERWRITE DIRECTORY '/tmp/destination'
USING PARQUET
OPTIONS ("col1" = "1", "col2" = "2", "col3" = 'test')
SELECT * FROM test_table;
SELECT a FROM test_table;

INSERT OVERWRITE DIRECTORY
USING PARQUET
OPTIONS (
'path' = '/tmp/destination', "col1" = "1", "col2" = "2", "col3" = 'test'
)
SELECT * FROM test_table;
SELECT a FROM test_table;

INSERT OVERWRITE DIRECTORY
USING PARQUET
OPTIONS ('path' '/tmp/destination', col1 1, col2 2, col3 'test')
SELECT a FROM test_table;

INSERT OVERWRITE DIRECTORY '/tmp/destination'
USING PARQUET
OPTIONS (col1 1, col2 2, col3 'test')
SELECT a FROM test_table;

0 comments on commit 8bcd099

Please sign in to comment.