diff --git a/Cargo.toml b/Cargo.toml index a7b2727bd882..4007106ea5f1 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -137,7 +137,7 @@ rand = "0.8" regex = "1.8" rstest = "0.22.0" serde_json = "1" -sqlparser = { version = "0.50.0", features = ["visitor"] } +sqlparser = { git = "https://github.com/tarantool/datafusion-sqlparser-rs.git", features = ["visitor"], branch = "release-0.50.0" } tempfile = "3" thiserror = "1.0.44" tokio = { version = "1.36", features = ["macros", "rt", "sync"] } diff --git a/datafusion/sql/src/parser.rs b/datafusion/sql/src/parser.rs index 744b56d5521f..97e719111a5b 100644 --- a/datafusion/sql/src/parser.rs +++ b/datafusion/sql/src/parser.rs @@ -279,12 +279,17 @@ impl<'a> DFParser<'a> { sql: &str, dialect: &'a dyn Dialect, ) -> Result { - let mut tokenizer = Tokenizer::new(dialect, sql); - let tokens = tokenizer.tokenize()?; + let tokens = Tokenizer::new(dialect, sql).into_tokens().collect::>()?; + Ok(Self::from_dialect_and_tokens(dialect, tokens)) + } - Ok(DFParser { - parser: Parser::new(dialect).with_tokens(tokens), - }) + /// Create a new parser from specified dialect and tokens. + pub fn from_dialect_and_tokens( + dialect: &'a dyn Dialect, + tokens: Vec, + ) -> Self { + let parser = Parser::new(dialect).with_tokens(tokens); + DFParser { parser } } /// Parse a sql string into one or [`Statement`]s using the @@ -300,7 +305,18 @@ impl<'a> DFParser<'a> { sql: &str, dialect: &dyn Dialect, ) -> Result, ParserError> { - let mut parser = DFParser::new_with_dialect(sql, dialect)?; + let tokenizer = Tokenizer::new(dialect, sql); + let tokens = tokenizer.into_tokens().collect::>()?; + Self::parse_tokens_with_dialect(tokens, dialect) + } + + /// Parse SQL tokens and produce one or more [`Statement`]s with + /// the specified dialect. + pub fn parse_tokens_with_dialect( + tokens: Vec, + dialect: &dyn Dialect, + ) -> Result, ParserError> { + let mut parser = DFParser::from_dialect_and_tokens(dialect, tokens); let mut stmts = VecDeque::new(); let mut expecting_statement_delimiter = false; loop {