Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

SparkSQL: Support for CONVERT TO DELTA command #3482

Merged
merged 2 commits into from Jun 22, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
39 changes: 33 additions & 6 deletions src/sqlfluff/dialects/dialect_sparksql.py
Expand Up @@ -380,6 +380,7 @@
# Similar to DataSourcesV2
"DELTA", # https://github.com/delta-io/delta
"CSV",
"ICEBERG",
"TEXT",
"BINARYFILE",
),
Expand Down Expand Up @@ -423,14 +424,20 @@
StartHintSegment=StringParser("/*+", KeywordSegment, name="start_hint"),
EndHintSegment=StringParser("*/", KeywordSegment, name="end_hint"),
PartitionSpecGrammar=Sequence(
OneOf("PARTITION", Sequence("PARTITIONED", "BY")),
OneOf(
"PARTITION",
Sequence("PARTITIONED", "BY"),
),
Bracketed(
Delimited(
Sequence(
Ref("ColumnReferenceSegment"),
Ref("EqualsSegment", optional=True),
Ref("LiteralGrammar", optional=True),
Ref("CommentGrammar", optional=True),
OneOf(
Ref("ColumnDefinitionSegment"),
Sequence(
Ref("ColumnReferenceSegment"),
Ref("EqualsSegment", optional=True),
Ref("LiteralGrammar", optional=True),
Ref("CommentGrammar", optional=True),
),
),
),
),
Expand Down Expand Up @@ -2238,6 +2245,7 @@ class StatementSegment(ansi.StatementSegment):
Ref("DescribeHistoryStatementSegment"),
Ref("DescribeDetailStatementSegment"),
Ref("GenerateManifestFileStatementSegment"),
Ref("ConvertToDeltaStatementSegment"),
],
remove=[
Ref("TransactionStatementSegment"),
Expand Down Expand Up @@ -2711,3 +2719,22 @@ class GenerateManifestFileStatementSegment(BaseSegment):
Ref("TableReferenceSegment"),
),
)


class ConvertToDeltaStatementSegment(BaseSegment):
"""A statement to convert other file formats to Delta.

https://docs.delta.io/latest/delta-utility.html#convert-a-parquet-table-to-a-delta-table
https://docs.databricks.com/delta/delta-utility.html#convert-an-iceberg-table-to-a-delta-table
"""

type = "convert_to_delta_statement"

match_grammar: Matchable = Sequence(
"CONVERT",
"TO",
"DELTA",
Ref("FileReferenceSegment"),
Sequence("NO", "STATISTICS", optional=True),
Ref("PartitionSpecGrammar", optional=True),
)
1 change: 1 addition & 0 deletions src/sqlfluff/dialects/dialect_sparksql_keywords.py
Expand Up @@ -280,6 +280,7 @@
# Community Contributed Data Sources
"DELTA", # https://github.com/delta-io/delta
"XML", # https://github.com/databricks/spark-xml
"ICEBERG",
# Delta Lake
"DETAIL",
"DRY",
Expand Down
13 changes: 13 additions & 0 deletions test/fixtures/dialects/sparksql/delta_convert_to.sql
@@ -0,0 +1,13 @@
-- Convert unpartitioned Parquet table at path '<path-to-table>'
CONVERT TO DELTA PARQUET.`/data/events/`;

-- Convert partitioned Parquet table at path '<path-to-table>'
-- and partitioned by integer columns named 'part' and 'part2'
CONVERT TO DELTA PARQUET.`/data/events/` PARTITIONED BY (part int, part2 int);

-- Convert the Iceberg table in the path <path-to-table>.
CONVERT TO DELTA ICEBERG.`/data/events/`;

-- Convert the Iceberg table in the path <path-to-table>
-- without collecting statistics
CONVERT TO DELTA ICEBERG.`/data/events/` NO STATISTICS;
65 changes: 65 additions & 0 deletions test/fixtures/dialects/sparksql/delta_convert_to.yml
@@ -0,0 +1,65 @@
# YML test files are auto-generated from SQL files and should not be edited by
# hand. To help enforce this, the "hash" field in the file must match a hash
# computed by SQLFluff when running the tests. Please run
# `python test/generate_parse_fixture_yml.py` to generate them after adding or
# altering SQL files.
_hash: 2c7c05978f82e2fc0f6dbdf7d8f1ae11b70b24c3d2ca90a2288bd6d145ae5a08
file:
- statement:
convert_to_delta_statement:
- keyword: CONVERT
- keyword: TO
- keyword: DELTA
- file_reference:
keyword: PARQUET
dot: .
identifier: '`/data/events/`'
- statement_terminator: ;
- statement:
convert_to_delta_statement:
- keyword: CONVERT
- keyword: TO
- keyword: DELTA
- file_reference:
keyword: PARQUET
dot: .
identifier: '`/data/events/`'
- keyword: PARTITIONED
- keyword: BY
- bracketed:
- start_bracket: (
- column_definition:
identifier: part
data_type:
primitive_type:
keyword: int
- comma: ','
- column_definition:
identifier: part2
data_type:
primitive_type:
keyword: int
- end_bracket: )
- statement_terminator: ;
- statement:
convert_to_delta_statement:
- keyword: CONVERT
- keyword: TO
- keyword: DELTA
- file_reference:
keyword: ICEBERG
dot: .
identifier: '`/data/events/`'
- statement_terminator: ;
- statement:
convert_to_delta_statement:
- keyword: CONVERT
- keyword: TO
- keyword: DELTA
- file_reference:
keyword: ICEBERG
dot: .
identifier: '`/data/events/`'
- keyword: 'NO'
- keyword: STATISTICS
- statement_terminator: ;