diff --git a/sqlglot/expressions/dml.py b/sqlglot/expressions/dml.py index 1aa31ee732..ed50983220 100644 --- a/sqlglot/expressions/dml.py +++ b/sqlglot/expressions/dml.py @@ -282,7 +282,8 @@ class LoadData(Expression): "this": True, "local": False, "overwrite": False, - "inpath": True, + "inpath": False, + "files": False, "partition": False, "input_format": False, "serde": False, diff --git a/sqlglot/generator.py b/sqlglot/generator.py index 9ba486bdf4..d9f08c480a 100644 --- a/sqlglot/generator.py +++ b/sqlglot/generator.py @@ -2811,10 +2811,20 @@ def escape_str( return self._replace_line_breaks(text).replace(delimiter, escaped_delimiter) def loaddata_sql(self, expression: exp.LoadData) -> str: + is_overwrite = expression.args.get("overwrite") + overwrite = " OVERWRITE" if is_overwrite else "" + this = self.sql(expression, "this") + + files = expression.args.get("files") + if files: + files_sql = self.expressions(files, flat=True) + files_sql = f"FILES{self.wrap(files_sql)}" + this = f" {this}" if is_overwrite else f" INTO TABLE {this}" + return f"LOAD DATA{overwrite}{this} FROM {files_sql}" + local = " LOCAL" if expression.args.get("local") else "" inpath = f" INPATH {self.sql(expression, 'inpath')}" - overwrite = " OVERWRITE" if expression.args.get("overwrite") else "" - this = f" INTO TABLE {self.sql(expression, 'this')}" + this = f" INTO TABLE {this}" partition = self.sql(expression, "partition") partition = f" {partition}" if partition else "" input_format = self.sql(expression, "input_format") diff --git a/sqlglot/parser.py b/sqlglot/parser.py index 586b285531..7bb9c36b68 100644 --- a/sqlglot/parser.py +++ b/sqlglot/parser.py @@ -3558,6 +3558,8 @@ def _parse_load(self) -> exp.LoadData | exp.Command: local=local, overwrite=overwrite, inpath=inpath, + files=self._match_text_seq("FROM", "FILES") + and exp.Properties(expressions=self._parse_wrapped_properties()), partition=self._parse_partition(), input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), serde=self._match_text_seq("SERDE") and self._parse_string(), diff --git a/tests/dialects/test_bigquery.py b/tests/dialects/test_bigquery.py index 03f55e135c..dfe7f243c0 100644 --- a/tests/dialects/test_bigquery.py +++ b/tests/dialects/test_bigquery.py @@ -206,6 +206,12 @@ def test_bigquery(self): self.validate_identity("BEGIN TRANSACTION") self.validate_identity("COMMIT TRANSACTION") self.validate_identity("ROLLBACK TRANSACTION") + for load_data_sql in ( + "LOAD DATA OVERWRITE mydataset.table1 FROM FILES(FORMAT='AVRO', uris=['gs://bucket/path/file.avro'])", + "LOAD DATA INTO TABLE mydataset.table1 FROM FILES(FORMAT='AVRO', uris=['gs://bucket/path/file.avro'])", + ): + with self.subTest(load_data_sql=load_data_sql): + self.validate_identity(load_data_sql).assert_is(exp.LoadData) self.validate_identity("CAST(x AS BIGNUMERIC)") self.validate_identity("SELECT y + 1 FROM x GROUP BY y + 1 ORDER BY 1") self.validate_identity("SELECT TIMESTAMP_SECONDS(2) AS t")