-
Notifications
You must be signed in to change notification settings - Fork 1.1k
fix(bigquery): support LOAD DATA FROM FILES syntax #7482
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -706,6 +706,27 @@ def _parse_export_data(self) -> exp.Export: | |
| ) | ||
| ) | ||
|
|
||
| def _parse_load(self) -> exp.LoadData | exp.Command: | ||
| index = self._index | ||
|
|
||
| if self._match_text_seq("DATA"): | ||
| overwrite = self._match(TokenType.OVERWRITE) | ||
| if not overwrite: | ||
| self._match_pair(TokenType.INTO, TokenType.TABLE) | ||
|
|
||
| this = self._parse_table(schema=True) | ||
| if this and self._match_text_seq("FROM", "FILES"): | ||
| return self.expression( | ||
| exp.LoadData( | ||
| this=this, | ||
| overwrite=overwrite, | ||
| files=exp.Properties(expressions=self._parse_wrapped_properties()), | ||
| ) | ||
| ) | ||
|
|
||
| self._retreat(index) | ||
| return super()._parse_load() | ||
|
Comment on lines
+709
to
+728
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Looks like there's overlapping logic with the base parser function, the main difference being matching We can instead just inline this into the base parser e.g: # parser.py
def _parse_load(self) -> exp.LoadData | exp.Command:
if self._match_text_seq("DATA"):
...
return self.expression(
exp.LoadData(
...,
files = self._match_text_seq("FROM", "FILES") and exp.Properties(expressions=self._parse_wrapped_properties())
)
)
return self._parse_as_command(self._prev) |
||
|
|
||
| def _parse_column_ops(self, this: exp.Expr | None) -> exp.Expr | None: | ||
| func_index = self._index + 1 | ||
| this = super()._parse_column_ops(this) | ||
|
|
||
| Original file line number | Diff line number | Diff line change | ||||||
|---|---|---|---|---|---|---|---|---|
|
|
@@ -206,6 +206,14 @@ def test_bigquery(self): | |||||||
| self.validate_identity("BEGIN TRANSACTION") | ||||||||
| self.validate_identity("COMMIT TRANSACTION") | ||||||||
| self.validate_identity("ROLLBACK TRANSACTION") | ||||||||
| self.validate_identity( | ||||||||
| "LOAD DATA OVERWRITE mydataset.table1 FROM FILES(format='AVRO', uris=['gs://bucket/path/file.avro'])", | ||||||||
| "LOAD DATA OVERWRITE mydataset.table1 FROM FILES(FORMAT='AVRO', uris=['gs://bucket/path/file.avro'])", | ||||||||
|
Comment on lines
+210
to
+211
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You can replace the second line with the first and leave
Suggested change
|
||||||||
| ).assert_is(exp.LoadData) | ||||||||
| self.validate_identity( | ||||||||
| "LOAD DATA INTO TABLE mydataset.table1 FROM FILES(format='AVRO', uris=['gs://bucket/path/file.avro'])", | ||||||||
| "LOAD DATA INTO TABLE mydataset.table1 FROM FILES(FORMAT='AVRO', uris=['gs://bucket/path/file.avro'])", | ||||||||
|
Comment on lines
+214
to
+215
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ditto; You can inline both of these tests into one if you use def test_load_data(self):
with self.subTest(f"Testing LOAD DATA FROM FILES"):
for overwrite in ("", "OVERWRITE "):
self.validate_identity(f"LOAD DATA {ovewrite} ...").assert_is(exp.LoadData)
|
||||||||
| ) | ||||||||
| self.validate_identity("CAST(x AS BIGNUMERIC)") | ||||||||
| self.validate_identity("SELECT y + 1 FROM x GROUP BY y + 1 ORDER BY 1") | ||||||||
| self.validate_identity("SELECT TIMESTAMP_SECONDS(2) AS t") | ||||||||
|
|
||||||||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ditto, can we use the base generator and render
filesthere? To avoid duplicating logic