From 17f2930885145b1080a61d2e5b6fbd9027a88844 Mon Sep 17 00:00:00 2001 From: Dmitry Patsura Date: Mon, 28 Dec 2020 14:22:03 +0300 Subject: [PATCH 01/23] Introduce support for EXPLAIN [ANALYZE] [VERBOSE] syntax Introduce support for EXPLAIN [ANALYZE] [VERBOSE] syntax --- src/ast/mod.rs | 26 ++++++++++++++++ src/dialect/keywords.rs | 7 +++++ src/parser.rs | 14 +++++++++ src/tokenizer.rs | 62 +++++++++++++++++++++++++++++++++++++++ tests/sqlparser_common.rs | 58 +++++++++++++++++++++++++++++------- 5 files changed, 156 insertions(+), 11 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index a726b299d..2d63bbfab 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -431,6 +431,15 @@ impl fmt::Display for WindowFrameBound { #[derive(Debug, Clone, PartialEq, Eq, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] pub enum Statement { + // EXPLAIN + Explain { + // Carry out the command and show actual run times and other statistics. + analyze: bool, + // Display additional information regarding the plan. + verbose: bool, + /// A SQL query that specifies what to explain + statement: Box, + }, /// SELECT Query(Box), /// INSERT @@ -591,6 +600,23 @@ impl fmt::Display for Statement { #[allow(clippy::cognitive_complexity)] fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { + Statement::Explain { + verbose, + analyze, + statement, + } => { + write!(f, "EXPLAIN ")?; + + if *analyze { + write!(f, "ANALYZE ")?; + } + + if *verbose { + write!(f, "VERBOSE ")?; + } + + write!(f, "{}", statement) + } Statement::Query(s) => write!(f, "{}", s), Statement::Insert { table_name, diff --git a/src/dialect/keywords.rs b/src/dialect/keywords.rs index d14534881..6e7065043 100644 --- a/src/dialect/keywords.rs +++ b/src/dialect/keywords.rs @@ -72,6 +72,7 @@ define_keywords!( ALL, ALLOCATE, ALTER, + ANALYZE, AND, ANY, APPLY, @@ -190,6 +191,7 @@ define_keywords!( EXECUTE, EXISTS, EXP, + EXPLAIN, EXTENDED, EXTERNAL, EXTRACT, @@ -443,6 +445,7 @@ define_keywords!( VARYING, VAR_POP, VAR_SAMP, + VERBOSE, VERSIONING, VIEW, VIRTUAL, @@ -465,6 +468,8 @@ define_keywords!( pub const RESERVED_FOR_TABLE_ALIAS: &[Keyword] = &[ // Reserved as both a table and a column alias: Keyword::WITH, + Keyword::EXPLAIN, + Keyword::ANALYZE, Keyword::SELECT, Keyword::WHERE, Keyword::GROUP, @@ -496,6 +501,8 @@ pub const RESERVED_FOR_TABLE_ALIAS: &[Keyword] = &[ pub const RESERVED_FOR_COLUMN_ALIAS: &[Keyword] = &[ // Reserved as both a table and a column alias: Keyword::WITH, + Keyword::EXPLAIN, + Keyword::ANALYZE, Keyword::SELECT, Keyword::WHERE, Keyword::GROUP, diff --git a/src/parser.rs b/src/parser.rs index b40e94de7..0db093f93 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -131,6 +131,7 @@ impl<'a> Parser<'a> { pub fn parse_statement(&mut self) -> Result { match self.next_token() { Token::Word(w) => match w.keyword { + Keyword::EXPLAIN => Ok(self.parse_explain()?), Keyword::SELECT | Keyword::WITH | Keyword::VALUES => { self.prev_token(); Ok(Statement::Query(Box::new(self.parse_query()?))) @@ -1790,6 +1791,19 @@ impl<'a> Parser<'a> { }) } + pub fn parse_explain(&mut self) -> Result { + let analyze = self.parse_keyword(Keyword::ANALYZE); + let verbose = self.parse_keyword(Keyword::VERBOSE); + + let statement = Box::new(self.parse_statement()?); + + Ok(Statement::Explain { + analyze, + verbose, + statement, + }) + } + /// Parse a query expression, i.e. a `SELECT` statement optionally /// preceeded with some `WITH` CTE declarations and optionally followed /// by `ORDER BY`. Unlike some other parse_... methods, this one doesn't diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 70587f18b..bbad1a4c4 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -734,6 +734,68 @@ mod tests { compare(expected, tokens); } + #[test] + fn tokenize_explain_select() { + let sql = String::from("EXPLAIN SELECT * FROM customer WHERE id = 1"); + let dialect = GenericDialect {}; + let mut tokenizer = Tokenizer::new(&dialect, &sql); + let tokens = tokenizer.tokenize().unwrap(); + + let expected = vec![ + Token::make_keyword("EXPLAIN"), + Token::Whitespace(Whitespace::Space), + Token::make_keyword("SELECT"), + Token::Whitespace(Whitespace::Space), + Token::Mult, + Token::Whitespace(Whitespace::Space), + Token::make_keyword("FROM"), + Token::Whitespace(Whitespace::Space), + Token::make_word("customer", None), + Token::Whitespace(Whitespace::Space), + Token::make_keyword("WHERE"), + Token::Whitespace(Whitespace::Space), + Token::make_word("id", None), + Token::Whitespace(Whitespace::Space), + Token::Eq, + Token::Whitespace(Whitespace::Space), + Token::Number(String::from("1")), + ]; + + compare(expected, tokens); + } + + #[test] + fn tokenize_explain_analyze_select() { + let sql = String::from("EXPLAIN ANALYZE SELECT * FROM customer WHERE id = 1"); + let dialect = GenericDialect {}; + let mut tokenizer = Tokenizer::new(&dialect, &sql); + let tokens = tokenizer.tokenize().unwrap(); + + let expected = vec![ + Token::make_keyword("EXPLAIN"), + Token::Whitespace(Whitespace::Space), + Token::make_keyword("ANALYZE"), + Token::Whitespace(Whitespace::Space), + Token::make_keyword("SELECT"), + Token::Whitespace(Whitespace::Space), + Token::Mult, + Token::Whitespace(Whitespace::Space), + Token::make_keyword("FROM"), + Token::Whitespace(Whitespace::Space), + Token::make_word("customer", None), + Token::Whitespace(Whitespace::Space), + Token::make_keyword("WHERE"), + Token::Whitespace(Whitespace::Space), + Token::make_word("id", None), + Token::Whitespace(Whitespace::Space), + Token::Eq, + Token::Whitespace(Whitespace::Space), + Token::Number(String::from("1")), + ]; + + compare(expected, tokens); + } + #[test] fn tokenize_string_predicate() { let sql = String::from("SELECT * FROM customer WHERE salary != 'Not Provided'"); diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 7fba5dcb9..a311dd267 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -543,17 +543,23 @@ fn parse_is_not_null() { fn parse_not_precedence() { // NOT has higher precedence than OR/AND, so the following must parse as (NOT true) OR true let sql = "NOT true OR true"; - assert_matches!(verified_expr(sql), Expr::BinaryOp { - op: BinaryOperator::Or, - .. - }); + assert_matches!( + verified_expr(sql), + Expr::BinaryOp { + op: BinaryOperator::Or, + .. + } + ); // But NOT has lower precedence than comparison operators, so the following parses as NOT (a IS NULL) let sql = "NOT a IS NULL"; - assert_matches!(verified_expr(sql), Expr::UnaryOp { - op: UnaryOperator::Not, - .. - }); + assert_matches!( + verified_expr(sql), + Expr::UnaryOp { + op: UnaryOperator::Not, + .. + } + ); // NOT has lower precedence than BETWEEN, so the following parses as NOT (1 NOT BETWEEN 1 AND 2) let sql = "NOT 1 NOT BETWEEN 1 AND 2"; @@ -1463,7 +1469,7 @@ fn parse_create_external_table_lowercase() { lng DOUBLE) \ STORED AS PARQUET LOCATION '/tmp/example.csv'", ); - assert_matches!(ast, Statement::CreateTable{..}); + assert_matches!(ast, Statement::CreateTable { .. }); } #[test] @@ -1606,6 +1612,33 @@ fn parse_scalar_function_in_projection() { ); } +fn run_explain_analyze(query: &str, expected_verbose: bool, expected_analyze: bool) { + match verified_stmt(query) { + Statement::Explain { + analyze, + verbose, + statement, + } => { + assert_eq!(verbose, expected_verbose); + assert_eq!(analyze, expected_analyze); + assert_eq!("SELECT sqrt(id) FROM foo", statement.to_string()); + } + _ => panic!("Unexpected Statement, must be Explain"), + } +} + +#[test] +fn parse_explain_analyze_with_simple_select() { + run_explain_analyze("EXPLAIN SELECT sqrt(id) FROM foo", false, false); + run_explain_analyze("EXPLAIN VERBOSE SELECT sqrt(id) FROM foo", true, false); + run_explain_analyze("EXPLAIN ANALYZE SELECT sqrt(id) FROM foo", false, true); + run_explain_analyze( + "EXPLAIN ANALYZE VERBOSE SELECT sqrt(id) FROM foo", + true, + true, + ); +} + #[test] fn parse_named_argument_function() { let sql = "SELECT FUN(a => '1', b => '2') FROM foo"; @@ -2554,11 +2587,14 @@ fn parse_multiple_statements() { #[test] fn parse_scalar_subqueries() { let sql = "(SELECT 1) + (SELECT 2)"; - assert_matches!(verified_expr(sql), Expr::BinaryOp { + assert_matches!( + verified_expr(sql), + Expr::BinaryOp { op: BinaryOperator::Plus, .. //left: box Subquery { .. }, //right: box Subquery { .. }, - }); + } + ); } #[test] From 94ff46802c4e0fd9ddd63667d89329c1ca59685d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dani=C3=ABl=20Heres?= Date: Mon, 28 Dec 2020 18:08:32 +0100 Subject: [PATCH 02/23] Support ANALYZE TABLE syntax (#285) * Support analyze table * Cleanup --- src/ast/mod.rs | 24 +++++++++++++++--------- src/parser.rs | 10 ++++++++++ tests/sqlparser_common.rs | 12 ++++++++++++ 3 files changed, 37 insertions(+), 9 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 2d63bbfab..4232ad022 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -431,15 +431,6 @@ impl fmt::Display for WindowFrameBound { #[derive(Debug, Clone, PartialEq, Eq, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] pub enum Statement { - // EXPLAIN - Explain { - // Carry out the command and show actual run times and other statistics. - analyze: bool, - // Display additional information regarding the plan. - verbose: bool, - /// A SQL query that specifies what to explain - statement: Box, - }, /// SELECT Query(Box), /// INSERT @@ -592,6 +583,20 @@ pub enum Statement { data_types: Vec, statement: Box, }, + /// EXPLAIN + Explain { + /// Carry out the command and show actual run times and other statistics. + analyze: bool, + // Display additional information regarding the plan. + verbose: bool, + /// A SQL query that specifies what to explain + statement: Box, + }, + /// ANALYZE + Analyze { + /// Name of table + table_name: ObjectName, + }, } impl fmt::Display for Statement { @@ -617,6 +622,7 @@ impl fmt::Display for Statement { write!(f, "{}", statement) } + Statement::Analyze { table_name } => write!(f, "ANALYZE TABLE {}", table_name), Statement::Query(s) => write!(f, "{}", s), Statement::Insert { table_name, diff --git a/src/parser.rs b/src/parser.rs index 0db093f93..94afeb6e9 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -132,6 +132,7 @@ impl<'a> Parser<'a> { match self.next_token() { Token::Word(w) => match w.keyword { Keyword::EXPLAIN => Ok(self.parse_explain()?), + Keyword::ANALYZE => Ok(self.parse_analyze()?), Keyword::SELECT | Keyword::WITH | Keyword::VALUES => { self.prev_token(); Ok(Statement::Query(Box::new(self.parse_query()?))) @@ -1804,6 +1805,15 @@ impl<'a> Parser<'a> { }) } + pub fn parse_analyze(&mut self) -> Result { + // ANALYZE TABLE table_name + self.expect_keyword(Keyword::TABLE)?; + + let table_name = self.parse_object_name()?; + + Ok(Statement::Analyze { table_name }) + } + /// Parse a query expression, i.e. a `SELECT` statement optionally /// preceeded with some `WITH` CTE declarations and optionally followed /// by `ORDER BY`. Unlike some other parse_... methods, this one doesn't diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index a311dd267..e7d78f950 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -1639,6 +1639,18 @@ fn parse_explain_analyze_with_simple_select() { ); } +#[test] +fn parse_simple_analyze() { + let sql = "ANALYZE TABLE t"; + let stmt = verified_stmt(sql); + assert_eq!( + stmt, + Statement::Analyze { + table_name: ObjectName(vec![Ident::new("t")]) + } + ); +} + #[test] fn parse_named_argument_function() { let sql = "SELECT FUN(a => '1', b => '2') FROM foo"; From e18e8dc67498348ab6a50c80400930876d71813a Mon Sep 17 00:00:00 2001 From: "Heres, Daniel" Date: Mon, 28 Dec 2020 20:31:14 +0100 Subject: [PATCH 03/23] Prepare 0.6.2 --- CHANGELOG.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 05dbfdfc4..cfc0a4893 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,8 @@ Given that the parser produces a typed AST, any changes to the AST will technica ## [Unreleased] Check https://github.com/ballista-compute/sqlparser-rs/commits/main for undocumented changes. +## [0.6.2] + ### Changed - Change the MySQL dialect to support `` `identifiers` `` quoted with backticks instead of the standard `"double-quoted"` identifiers (#247) - thanks @mashuai! - Update bigdecimal requirement from 0.1 to 0.2 (#268) @@ -21,6 +23,10 @@ Check https://github.com/ballista-compute/sqlparser-rs/commits/main for undocume - Support PostgreSQL math operators (#267) - thanks @alex-dukhno! - Add SQLite dialect (#248) - thanks @mashuai! - Add Snowflake dialect (#259) - thanks @eyalleshem! +- Support for Recursive CTEs - thanks @rhanqtl! +- Support `FROM (table_name) alias` syntax - thanks @eyalleshem! +- Support for `EXPLAIN [ANALYZE] VERBOSE` - thanks @ovr! +- Support `ANALYZE TABLE` - DDL: - Support `OR REPLACE` in `CREATE VIEW`/`TABLE` (#239) - thanks @Dandandan! - Support specifying `ASC`/`DESC` in index columns (#249) - thanks @mashuai! From d66294fab80b88864a0197b02ba97db455d90c4b Mon Sep 17 00:00:00 2001 From: "Heres, Daniel" Date: Mon, 28 Dec 2020 20:32:11 +0100 Subject: [PATCH 04/23] Add date --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index cfc0a4893..7798d8d6e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,7 +8,7 @@ Given that the parser produces a typed AST, any changes to the AST will technica ## [Unreleased] Check https://github.com/ballista-compute/sqlparser-rs/commits/main for undocumented changes. -## [0.6.2] +## [0.6.2] 2020-12-28 ### Changed - Change the MySQL dialect to support `` `identifiers` `` quoted with backticks instead of the standard `"double-quoted"` identifiers (#247) - thanks @mashuai! From 26c281eaf7f3b88828e54dec24b189af357aa2cc Mon Sep 17 00:00:00 2001 From: "Heres, Daniel" Date: Mon, 28 Dec 2020 20:35:21 +0100 Subject: [PATCH 05/23] (cargo-release) version 0.6.2 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index b57cbb997..8e9724138 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "sqlparser" description = "Extensible SQL Lexer and Parser with support for ANSI SQL:2011" -version = "0.6.1" +version = "0.6.2" authors = ["Andy Grove "] homepage = "https://github.com/ballista-compute/sqlparser-rs" documentation = "https://docs.rs/sqlparser/" From 9930bdff68c19e2af069c7f38025bb11656b6181 Mon Sep 17 00:00:00 2001 From: "Heres, Daniel" Date: Mon, 28 Dec 2020 20:35:22 +0100 Subject: [PATCH 06/23] (cargo-release) start next development iteration 0.6.3-alpha.0 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 8e9724138..9720a71a6 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "sqlparser" description = "Extensible SQL Lexer and Parser with support for ANSI SQL:2011" -version = "0.6.2" +version = "0.6.3-alpha.0" authors = ["Andy Grove "] homepage = "https://github.com/ballista-compute/sqlparser-rs" documentation = "https://docs.rs/sqlparser/" From 97cd1c017d590f8ba0476ab715c6383446da8084 Mon Sep 17 00:00:00 2001 From: "Heres, Daniel" Date: Mon, 28 Dec 2020 21:48:07 +0100 Subject: [PATCH 07/23] Release 0.7.0 instead --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7798d8d6e..c1c5a763f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,7 +8,7 @@ Given that the parser produces a typed AST, any changes to the AST will technica ## [Unreleased] Check https://github.com/ballista-compute/sqlparser-rs/commits/main for undocumented changes. -## [0.6.2] 2020-12-28 +## [0.7.0] 2020-12-28 ### Changed - Change the MySQL dialect to support `` `identifiers` `` quoted with backticks instead of the standard `"double-quoted"` identifiers (#247) - thanks @mashuai! From e11b80ecf9c5cca087f9a225d07e574fbf170ade Mon Sep 17 00:00:00 2001 From: "Heres, Daniel" Date: Mon, 28 Dec 2020 21:51:28 +0100 Subject: [PATCH 08/23] (cargo-release) version 0.7.0 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 9720a71a6..fa4abe183 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "sqlparser" description = "Extensible SQL Lexer and Parser with support for ANSI SQL:2011" -version = "0.6.3-alpha.0" +version = "0.7.0" authors = ["Andy Grove "] homepage = "https://github.com/ballista-compute/sqlparser-rs" documentation = "https://docs.rs/sqlparser/" From 200ed5ecfcd2b84fb1c190c057cf0bb7a5770b8a Mon Sep 17 00:00:00 2001 From: "Heres, Daniel" Date: Mon, 28 Dec 2020 21:51:28 +0100 Subject: [PATCH 09/23] (cargo-release) start next development iteration 0.7.1-alpha.0 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index fa4abe183..231d7fee5 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "sqlparser" description = "Extensible SQL Lexer and Parser with support for ANSI SQL:2011" -version = "0.7.0" +version = "0.7.1-alpha.0" authors = ["Andy Grove "] homepage = "https://github.com/ballista-compute/sqlparser-rs" documentation = "https://docs.rs/sqlparser/" From 17f8eb9c5a7170aaee25b8dd92266a4d732ef2c5 Mon Sep 17 00:00:00 2001 From: joshwd36 Date: Thu, 7 Jan 2021 17:30:12 +0000 Subject: [PATCH 10/23] Fix clippy lints (#287) --- src/ast/ddl.rs | 2 +- src/ast/query.rs | 2 +- src/dialect/ansi.rs | 8 ++++---- src/dialect/generic.rs | 12 ++++++++---- src/dialect/mssql.rs | 12 ++++++++---- src/dialect/mysql.rs | 8 ++++---- src/dialect/postgresql.rs | 8 ++++---- src/dialect/snowflake.rs | 8 ++++---- src/dialect/sqlite.rs | 8 ++++---- 9 files changed, 38 insertions(+), 30 deletions(-) diff --git a/src/ast/ddl.rs b/src/ast/ddl.rs index 847ee71a3..272bf7c25 100644 --- a/src/ast/ddl.rs +++ b/src/ast/ddl.rs @@ -254,7 +254,7 @@ impl fmt::Display for ColumnOption { } } -fn display_constraint_name<'a>(name: &'a Option) -> impl fmt::Display + 'a { +fn display_constraint_name(name: &'_ Option) -> impl fmt::Display + '_ { struct ConstraintName<'a>(&'a Option); impl<'a> fmt::Display for ConstraintName<'a> { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { diff --git a/src/ast/query.rs b/src/ast/query.rs index b28fa92a9..1b8ccf7e4 100644 --- a/src/ast/query.rs +++ b/src/ast/query.rs @@ -346,7 +346,7 @@ impl fmt::Display for Join { _ => "", } } - fn suffix<'a>(constraint: &'a JoinConstraint) -> impl fmt::Display + 'a { + fn suffix(constraint: &'_ JoinConstraint) -> impl fmt::Display + '_ { struct Suffix<'a>(&'a JoinConstraint); impl<'a> fmt::Display for Suffix<'a> { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { diff --git a/src/dialect/ansi.rs b/src/dialect/ansi.rs index ca01fb751..1015ca2d3 100644 --- a/src/dialect/ansi.rs +++ b/src/dialect/ansi.rs @@ -17,13 +17,13 @@ pub struct AnsiDialect {} impl Dialect for AnsiDialect { fn is_identifier_start(&self, ch: char) -> bool { - (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') + ('a'..='z').contains(&ch) || ('A'..='Z').contains(&ch) } fn is_identifier_part(&self, ch: char) -> bool { - (ch >= 'a' && ch <= 'z') - || (ch >= 'A' && ch <= 'Z') - || (ch >= '0' && ch <= '9') + ('a'..='z').contains(&ch) + || ('A'..='Z').contains(&ch) + || ('0'..='9').contains(&ch) || ch == '_' } } diff --git a/src/dialect/generic.rs b/src/dialect/generic.rs index 104d3a9a3..818fa0d0a 100644 --- a/src/dialect/generic.rs +++ b/src/dialect/generic.rs @@ -17,13 +17,17 @@ pub struct GenericDialect; impl Dialect for GenericDialect { fn is_identifier_start(&self, ch: char) -> bool { - (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || ch == '_' || ch == '#' || ch == '@' + ('a'..='z').contains(&ch) + || ('A'..='Z').contains(&ch) + || ch == '_' + || ch == '#' + || ch == '@' } fn is_identifier_part(&self, ch: char) -> bool { - (ch >= 'a' && ch <= 'z') - || (ch >= 'A' && ch <= 'Z') - || (ch >= '0' && ch <= '9') + ('a'..='z').contains(&ch) + || ('A'..='Z').contains(&ch) + || ('0'..='9').contains(&ch) || ch == '@' || ch == '$' || ch == '#' diff --git a/src/dialect/mssql.rs b/src/dialect/mssql.rs index c613a1502..539a17a9f 100644 --- a/src/dialect/mssql.rs +++ b/src/dialect/mssql.rs @@ -23,13 +23,17 @@ impl Dialect for MsSqlDialect { fn is_identifier_start(&self, ch: char) -> bool { // See https://docs.microsoft.com/en-us/sql/relational-databases/databases/database-identifiers?view=sql-server-2017#rules-for-regular-identifiers // We don't support non-latin "letters" currently. - (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || ch == '_' || ch == '#' || ch == '@' + ('a'..='z').contains(&ch) + || ('A'..='Z').contains(&ch) + || ch == '_' + || ch == '#' + || ch == '@' } fn is_identifier_part(&self, ch: char) -> bool { - (ch >= 'a' && ch <= 'z') - || (ch >= 'A' && ch <= 'Z') - || (ch >= '0' && ch <= '9') + ('a'..='z').contains(&ch) + || ('A'..='Z').contains(&ch) + || ('0'..='9').contains(&ch) || ch == '@' || ch == '$' || ch == '#' diff --git a/src/dialect/mysql.rs b/src/dialect/mysql.rs index a4aaafe6b..6581195b8 100644 --- a/src/dialect/mysql.rs +++ b/src/dialect/mysql.rs @@ -20,15 +20,15 @@ impl Dialect for MySqlDialect { // See https://dev.mysql.com/doc/refman/8.0/en/identifiers.html. // We don't yet support identifiers beginning with numbers, as that // makes it hard to distinguish numeric literals. - (ch >= 'a' && ch <= 'z') - || (ch >= 'A' && ch <= 'Z') + ('a'..='z').contains(&ch) + || ('A'..='Z').contains(&ch) || ch == '_' || ch == '$' - || (ch >= '\u{0080}' && ch <= '\u{ffff}') + || ('\u{0080}'..='\u{ffff}').contains(&ch) } fn is_identifier_part(&self, ch: char) -> bool { - self.is_identifier_start(ch) || (ch >= '0' && ch <= '9') + self.is_identifier_start(ch) || ('0'..='9').contains(&ch) } fn is_delimited_identifier_start(&self, ch: char) -> bool { diff --git a/src/dialect/postgresql.rs b/src/dialect/postgresql.rs index 1c11d8a37..0c2eb99f0 100644 --- a/src/dialect/postgresql.rs +++ b/src/dialect/postgresql.rs @@ -20,13 +20,13 @@ impl Dialect for PostgreSqlDialect { // See https://www.postgresql.org/docs/11/sql-syntax-lexical.html#SQL-SYNTAX-IDENTIFIERS // We don't yet support identifiers beginning with "letters with // diacritical marks and non-Latin letters" - (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || ch == '_' + ('a'..='z').contains(&ch) || ('A'..='Z').contains(&ch) || ch == '_' } fn is_identifier_part(&self, ch: char) -> bool { - (ch >= 'a' && ch <= 'z') - || (ch >= 'A' && ch <= 'Z') - || (ch >= '0' && ch <= '9') + ('a'..='z').contains(&ch) + || ('A'..='Z').contains(&ch) + || ('0'..='9').contains(&ch) || ch == '$' || ch == '_' } diff --git a/src/dialect/snowflake.rs b/src/dialect/snowflake.rs index 22fd55fa5..93db95692 100644 --- a/src/dialect/snowflake.rs +++ b/src/dialect/snowflake.rs @@ -18,13 +18,13 @@ pub struct SnowflakeDialect; impl Dialect for SnowflakeDialect { // see https://docs.snowflake.com/en/sql-reference/identifiers-syntax.html fn is_identifier_start(&self, ch: char) -> bool { - (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || ch == '_' + ('a'..='z').contains(&ch) || ('A'..='Z').contains(&ch) || ch == '_' } fn is_identifier_part(&self, ch: char) -> bool { - (ch >= 'a' && ch <= 'z') - || (ch >= 'A' && ch <= 'Z') - || (ch >= '0' && ch <= '9') + ('a'..='z').contains(&ch) + || ('A'..='Z').contains(&ch) + || ('0'..='9').contains(&ch) || ch == '$' || ch == '_' } diff --git a/src/dialect/sqlite.rs b/src/dialect/sqlite.rs index 16ec66ac2..4ce2f834b 100644 --- a/src/dialect/sqlite.rs +++ b/src/dialect/sqlite.rs @@ -25,14 +25,14 @@ impl Dialect for SQLiteDialect { fn is_identifier_start(&self, ch: char) -> bool { // See https://www.sqlite.org/draft/tokenreq.html - (ch >= 'a' && ch <= 'z') - || (ch >= 'A' && ch <= 'Z') + ('a'..='z').contains(&ch) + || ('A'..='Z').contains(&ch) || ch == '_' || ch == '$' - || (ch >= '\u{007f}' && ch <= '\u{ffff}') + || ('\u{007f}'..='\u{ffff}').contains(&ch) } fn is_identifier_part(&self, ch: char) -> bool { - self.is_identifier_start(ch) || (ch >= '0' && ch <= '9') + self.is_identifier_start(ch) || ('0'..='9').contains(&ch) } } From 8a214f99192435bd86b102db8002b78bb4187de2 Mon Sep 17 00:00:00 2001 From: Stephen Carman Date: Thu, 4 Feb 2021 14:53:20 -0500 Subject: [PATCH 11/23] Implement Hive QL Parsing (#235) --- examples/cli.rs | 1 + src/ast/data_type.rs | 3 + src/ast/ddl.rs | 43 ++- src/ast/mod.rs | 389 ++++++++++++++++++++-- src/ast/operator.rs | 2 + src/ast/query.rs | 75 ++++- src/ast/value.rs | 9 +- src/dialect/hive.rs | 39 +++ src/dialect/keywords.rs | 38 +++ src/dialect/mod.rs | 2 + src/parser.rs | 635 ++++++++++++++++++++++++++++++------ src/test_utils.rs | 3 +- src/tokenizer.rs | 59 +++- tests/sqlparser_common.rs | 25 +- tests/sqlparser_hive.rs | 212 ++++++++++++ tests/sqlparser_postgres.rs | 17 +- 16 files changed, 1382 insertions(+), 170 deletions(-) create mode 100644 src/dialect/hive.rs create mode 100644 tests/sqlparser_hive.rs diff --git a/examples/cli.rs b/examples/cli.rs index 5a3a3034b..9ac079949 100644 --- a/examples/cli.rs +++ b/examples/cli.rs @@ -40,6 +40,7 @@ $ cargo run --feature json_example --example cli FILENAME.sql [--dialectname] "--postgres" => Box::new(PostgreSqlDialect {}), "--ms" => Box::new(MsSqlDialect {}), "--snowflake" => Box::new(SnowflakeDialect {}), + "--hive" => Box::new(HiveDialect {}), "--generic" | "" => Box::new(GenericDialect {}), s => panic!("Unexpected parameter: {}", s), }; diff --git a/src/ast/data_type.rs b/src/ast/data_type.rs index 53122ab5d..388703e76 100644 --- a/src/ast/data_type.rs +++ b/src/ast/data_type.rs @@ -61,6 +61,8 @@ pub enum DataType { Regclass, /// Text Text, + /// String + String, /// Bytea Bytea, /// Custom type such as enums @@ -101,6 +103,7 @@ impl fmt::Display for DataType { DataType::Interval => write!(f, "INTERVAL"), DataType::Regclass => write!(f, "REGCLASS"), DataType::Text => write!(f, "TEXT"), + DataType::String => write!(f, "STRING"), DataType::Bytea => write!(f, "BYTEA"), DataType::Array(ty) => write!(f, "{}[]", ty), DataType::Custom(ty) => write!(f, "{}", ty), diff --git a/src/ast/ddl.rs b/src/ast/ddl.rs index 272bf7c25..67dc2e322 100644 --- a/src/ast/ddl.rs +++ b/src/ast/ddl.rs @@ -35,22 +35,54 @@ pub enum AlterTableOperation { if_exists: bool, cascade: bool, }, + /// `RENAME TO PARTITION (partition=val)` + RenamePartitions { + old_partitions: Vec, + new_partitions: Vec, + }, + /// Add Partitions + AddPartitions { + if_not_exists: bool, + new_partitions: Vec, + }, + DropPartitions { + partitions: Vec, + if_exists: bool, + }, /// `RENAME [ COLUMN ] TO ` RenameColumn { old_column_name: Ident, new_column_name: Ident, }, /// `RENAME TO ` - RenameTable { table_name: Ident }, + RenameTable { table_name: ObjectName }, } impl fmt::Display for AlterTableOperation { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { + AlterTableOperation::AddPartitions { + if_not_exists, + new_partitions, + } => write!( + f, + "ADD{ine} PARTITION ({})", + display_comma_separated(new_partitions), + ine = if *if_not_exists { " IF NOT EXISTS" } else { "" } + ), AlterTableOperation::AddConstraint(c) => write!(f, "ADD {}", c), AlterTableOperation::AddColumn { column_def } => { write!(f, "ADD COLUMN {}", column_def.to_string()) } + AlterTableOperation::DropPartitions { + partitions, + if_exists, + } => write!( + f, + "DROP{ie} PARTITION ({})", + display_comma_separated(partitions), + ie = if *if_exists { " IF EXISTS" } else { "" } + ), AlterTableOperation::DropConstraint { name } => write!(f, "DROP CONSTRAINT {}", name), AlterTableOperation::DropColumn { column_name, @@ -63,6 +95,15 @@ impl fmt::Display for AlterTableOperation { column_name, if *cascade { " CASCADE" } else { "" } ), + AlterTableOperation::RenamePartitions { + old_partitions, + new_partitions, + } => write!( + f, + "PARTITION ({}) RENAME TO PARTITION ({})", + display_comma_separated(old_partitions), + display_comma_separated(new_partitions) + ), AlterTableOperation::RenameColumn { old_column_name, new_column_name, diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 4232ad022..1999451d5 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -29,8 +29,9 @@ pub use self::ddl::{ }; pub use self::operator::{BinaryOperator, UnaryOperator}; pub use self::query::{ - Cte, Fetch, Join, JoinConstraint, JoinOperator, Offset, OffsetRows, OrderByExpr, Query, Select, - SelectItem, SetExpr, SetOperator, TableAlias, TableFactor, TableWithJoins, Top, Values, With, + Cte, Fetch, Join, JoinConstraint, JoinOperator, LateralView, Offset, OffsetRows, OrderByExpr, + Query, Select, SelectItem, SetExpr, SetOperator, TableAlias, TableFactor, TableWithJoins, Top, + Values, With, }; pub use self::value::{DateTimeField, Value}; @@ -191,7 +192,10 @@ pub enum Expr { right: Box, }, /// Unary operation e.g. `NOT foo` - UnaryOp { op: UnaryOperator, expr: Box }, + UnaryOp { + op: UnaryOperator, + expr: Box, + }, /// CAST an expression to a different data type e.g. `CAST(foo AS VARCHAR(123))` Cast { expr: Box, @@ -213,7 +217,14 @@ pub enum Expr { /// A constant of form ` 'value'`. /// This can represent ANSI SQL `DATE`, `TIME`, and `TIMESTAMP` literals (such as `DATE '2020-01-01'`), /// as well as constants of other types (a non-standard PostgreSQL extension). - TypedString { data_type: DataType, value: String }, + TypedString { + data_type: DataType, + value: String, + }, + MapAccess { + column: Box, + key: String, + }, /// Scalar function call e.g. `LEFT(foo, 5)` Function(Function), /// `CASE [] WHEN THEN ... [ELSE ] END` @@ -241,6 +252,7 @@ impl fmt::Display for Expr { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { Expr::Identifier(s) => write!(f, "{}", s), + Expr::MapAccess { column, key } => write!(f, "{}[\"{}\"]", column, key), Expr::Wildcard => f.write_str("*"), Expr::QualifiedWildcard(q) => write!(f, "{}.*", display_separated(q, ".")), Expr::CompoundIdentifier(s) => write!(f, "{}", display_separated(s, ".")), @@ -426,11 +438,50 @@ impl fmt::Display for WindowFrameBound { } } +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +pub enum AddDropSync { + ADD, + DROP, + SYNC, +} + +impl fmt::Display for AddDropSync { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + AddDropSync::SYNC => f.write_str("SYNC PARTITIONS"), + AddDropSync::DROP => f.write_str("DROP PARTITIONS"), + AddDropSync::ADD => f.write_str("ADD PARTITIONS"), + } + } +} + /// A top-level statement (SELECT, INSERT, CREATE, etc.) #[allow(clippy::large_enum_variant)] #[derive(Debug, Clone, PartialEq, Eq, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] pub enum Statement { + /// Analyze (Hive) + Analyze { + table_name: ObjectName, + partitions: Option>, + for_columns: bool, + columns: Vec, + cache_metadata: bool, + noscan: bool, + compute_statistics: bool, + }, + /// Truncate (Hive) + Truncate { + table_name: ObjectName, + partitions: Option>, + }, + /// Msck (Hive) + Msck { + table_name: ObjectName, + repair: bool, + partition_action: Option, + }, /// SELECT Query(Box), /// INSERT @@ -439,8 +490,24 @@ pub enum Statement { table_name: ObjectName, /// COLUMNS columns: Vec, + /// Overwrite (Hive) + overwrite: bool, /// A SQL query that specifies what to insert source: Box, + /// partitioned insert (Hive) + partitioned: Option>, + /// Columns defined after PARTITION + after_columns: Vec, + /// whether the insert has the table keyword (Hive) + table: bool, + }, + // TODO: Support ROW FORMAT + Directory { + overwrite: bool, + local: bool, + path: String, + file_format: Option, + source: Box, }, Copy { /// TABLE @@ -479,6 +546,7 @@ pub enum Statement { /// CREATE TABLE CreateTable { or_replace: bool, + temporary: bool, external: bool, if_not_exists: bool, /// Table name @@ -486,11 +554,15 @@ pub enum Statement { /// Optional schema columns: Vec, constraints: Vec, + hive_distribution: HiveDistributionStyle, + hive_formats: Option, + table_properties: Vec, with_options: Vec, file_format: Option, location: Option, query: Option>, without_rowid: bool, + like: Option, }, /// SQLite's `CREATE VIRTUAL TABLE .. USING ()` CreateVirtualTable { @@ -525,6 +597,9 @@ pub enum Statement { /// Whether `CASCADE` was specified. This will be `false` when /// `RESTRICT` or no drop behavior at all was specified. cascade: bool, + /// Hive allows you specify whether the table's stored data will be + /// deleted along with the dropped table + purge: bool, }, /// SET /// @@ -533,8 +608,9 @@ pub enum Statement { /// supported yet. SetVariable { local: bool, + hivevar: bool, variable: Ident, - value: SetVariableValue, + value: Vec, }, /// SHOW /// @@ -562,6 +638,13 @@ pub enum Statement { schema_name: ObjectName, if_not_exists: bool, }, + /// CREATE DATABASE + CreateDatabase { + db_name: ObjectName, + if_not_exists: bool, + location: Option, + managed_location: Option, + }, /// `ASSERT [AS ]` Assert { condition: Expr, @@ -592,11 +675,6 @@ pub enum Statement { /// A SQL query that specifies what to explain statement: Box, }, - /// ANALYZE - Analyze { - /// Name of table - table_name: ObjectName, - }, } impl fmt::Display for Statement { @@ -622,17 +700,114 @@ impl fmt::Display for Statement { write!(f, "{}", statement) } - Statement::Analyze { table_name } => write!(f, "ANALYZE TABLE {}", table_name), Statement::Query(s) => write!(f, "{}", s), + Statement::Directory { + overwrite, + local, + path, + file_format, + source, + } => { + write!( + f, + "INSERT{overwrite}{local} DIRECTORY '{path}'", + overwrite = if *overwrite { " OVERWRITE" } else { "" }, + local = if *local { " LOCAL" } else { "" }, + path = path + )?; + if let Some(ref ff) = file_format { + write!(f, " STORED AS {}", ff)? + } + write!(f, " {}", source) + } + Statement::Msck { + table_name, + repair, + partition_action, + } => { + write!( + f, + "MSCK {repair}TABLE {table}", + repair = if *repair { "REPAIR " } else { "" }, + table = table_name + )?; + if let Some(pa) = partition_action { + write!(f, " {}", pa)?; + } + Ok(()) + } + Statement::Truncate { + table_name, + partitions, + } => { + write!(f, "TRUNCATE TABLE {}", table_name)?; + if let Some(ref parts) = partitions { + if !parts.is_empty() { + write!(f, " PARTITION ({})", display_comma_separated(parts))?; + } + } + Ok(()) + } + Statement::Analyze { + table_name, + partitions, + for_columns, + columns, + cache_metadata, + noscan, + compute_statistics, + } => { + write!(f, "ANALYZE TABLE {}", table_name)?; + if let Some(ref parts) = partitions { + if !parts.is_empty() { + write!(f, " PARTITION ({})", display_comma_separated(parts))?; + } + } + + if *compute_statistics { + write!(f, " COMPUTE STATISTICS")?; + } + if *noscan { + write!(f, " NOSCAN")?; + } + if *cache_metadata { + write!(f, " CACHE METADATA")?; + } + if *for_columns { + write!(f, " FOR COLUMNS")?; + if !columns.is_empty() { + write!(f, " {}", display_comma_separated(columns))?; + } + } + Ok(()) + } Statement::Insert { table_name, + overwrite, + partitioned, columns, + after_columns, source, + table, } => { - write!(f, "INSERT INTO {} ", table_name)?; + write!( + f, + "INSERT {act}{tbl} {table_name} ", + table_name = table_name, + act = if *overwrite { "OVERWRITE" } else { "INTO" }, + tbl = if *table { " TABLE" } else { "" } + )?; if !columns.is_empty() { write!(f, "({}) ", display_comma_separated(columns))?; } + if let Some(ref parts) = partitioned { + if !parts.is_empty() { + write!(f, "PARTITION ({}) ", display_comma_separated(parts))?; + } + } + if !after_columns.is_empty() { + write!(f, "({}) ", display_comma_separated(after_columns))?; + } write!(f, "{}", source) } Statement::Copy { @@ -684,6 +859,25 @@ impl fmt::Display for Statement { } Ok(()) } + Statement::CreateDatabase { + db_name, + if_not_exists, + location, + managed_location, + } => { + write!(f, "CREATE")?; + if *if_not_exists { + write!(f, " IF NOT EXISTS")?; + } + write!(f, " {}", db_name)?; + if let Some(l) = location { + write!(f, " LOCATION '{}'", l)?; + } + if let Some(ml) = managed_location { + write!(f, " MANAGEDLOCATION '{}'", ml)?; + } + Ok(()) + } Statement::CreateView { name, or_replace, @@ -711,14 +905,19 @@ impl fmt::Display for Statement { name, columns, constraints, + table_properties, with_options, or_replace, if_not_exists, + hive_distribution, + hive_formats, external, + temporary, file_format, location, query, without_rowid, + like, } => { // We want to allow the following options // Empty column list, allowed by PostgreSQL: @@ -729,10 +928,11 @@ impl fmt::Display for Statement { // `CREATE TABLE t (a INT) AS SELECT a from t2` write!( f, - "CREATE {or_replace}{external}TABLE {if_not_exists}{name}", + "CREATE {or_replace}{external}{temporary}TABLE {if_not_exists}{name}", or_replace = if *or_replace { "OR REPLACE " } else { "" }, external = if *external { "EXTERNAL " } else { "" }, if_not_exists = if *if_not_exists { "IF NOT EXISTS " } else { "" }, + temporary = if *temporary { "TEMPORARY " } else { "" }, name = name, )?; if !columns.is_empty() || !constraints.is_empty() { @@ -741,7 +941,7 @@ impl fmt::Display for Statement { write!(f, ", ")?; } write!(f, "{})", display_comma_separated(constraints))?; - } else if query.is_none() { + } else if query.is_none() && like.is_none() { // PostgreSQL allows `CREATE TABLE t ();`, but requires empty parens write!(f, " ()")?; } @@ -749,6 +949,79 @@ impl fmt::Display for Statement { if *without_rowid { write!(f, " WITHOUT ROWID")?; } + + // Only for Hive + if let Some(l) = like { + write!(f, " LIKE {}", l)?; + } + match hive_distribution { + HiveDistributionStyle::PARTITIONED { columns } => { + write!(f, " PARTITIONED BY ({})", display_comma_separated(&columns))?; + } + HiveDistributionStyle::CLUSTERED { + columns, + sorted_by, + num_buckets, + } => { + write!(f, " CLUSTERED BY ({})", display_comma_separated(&columns))?; + if !sorted_by.is_empty() { + write!(f, " SORTED BY ({})", display_comma_separated(&sorted_by))?; + } + if *num_buckets > 0 { + write!(f, " INTO {} BUCKETS", num_buckets)?; + } + } + HiveDistributionStyle::SKEWED { + columns, + on, + stored_as_directories, + } => { + write!( + f, + " SKEWED BY ({})) ON ({})", + display_comma_separated(&columns), + display_comma_separated(&on) + )?; + if *stored_as_directories { + write!(f, " STORED AS DIRECTORIES")?; + } + } + _ => (), + } + + if let Some(HiveFormat { + row_format, + storage, + location, + }) = hive_formats + { + match row_format { + Some(HiveRowFormat::SERDE { class }) => { + write!(f, " ROW FORMAT SERDE '{}'", class)? + } + Some(HiveRowFormat::DELIMITED) => write!(f, " ROW FORMAT DELIMITED")?, + None => (), + } + match storage { + Some(HiveIOFormat::IOF { + input_format, + output_format, + }) => write!( + f, + " STORED AS INPUTFORMAT {} OUTPUTFORMAT {}", + input_format, output_format + )?, + Some(HiveIOFormat::FileFormat { format }) if !*external => { + write!(f, " STORED AS {}", format)? + } + _ => (), + } + if !*external { + if let Some(loc) = location { + write!(f, " LOCATION '{}'", loc)?; + } + } + } if *external { write!( f, @@ -757,6 +1030,13 @@ impl fmt::Display for Statement { location.as_ref().unwrap() )?; } + if !table_properties.is_empty() { + write!( + f, + " TBLPROPERTIES ({})", + display_comma_separated(table_properties) + )?; + } if !with_options.is_empty() { write!(f, " WITH ({})", display_comma_separated(with_options))?; } @@ -806,25 +1086,34 @@ impl fmt::Display for Statement { if_exists, names, cascade, + purge, } => write!( f, - "DROP {}{} {}{}", + "DROP {}{} {}{}{}", object_type, if *if_exists { " IF EXISTS" } else { "" }, display_comma_separated(names), if *cascade { " CASCADE" } else { "" }, + if *purge { " PURGE" } else { "" } ), Statement::SetVariable { local, variable, + hivevar, value, - } => write!( - f, - "SET{local} {variable} = {value}", - local = if *local { " LOCAL" } else { "" }, - variable = variable, - value = value - ), + } => { + f.write_str("SET ")?; + if *local { + f.write_str("LOCAL ")?; + } + write!( + f, + "{hivevar}{name} = {value}", + hivevar = if *hivevar { "HIVEVAR:" } else { "" }, + name = variable, + value = display_comma_separated(value) + ) + } Statement::ShowVariable { variable } => write!(f, "SHOW {}", variable), Statement::ShowColumns { extended, @@ -1086,6 +1375,62 @@ impl fmt::Display for ObjectType { } } +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +pub enum HiveDistributionStyle { + PARTITIONED { + columns: Vec, + }, + CLUSTERED { + columns: Vec, + sorted_by: Vec, + num_buckets: i32, + }, + SKEWED { + columns: Vec, + on: Vec, + stored_as_directories: bool, + }, + NONE, +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +pub enum HiveRowFormat { + SERDE { class: String }, + DELIMITED, +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +pub enum HiveIOFormat { + IOF { + input_format: Expr, + output_format: Expr, + }, + FileFormat { + format: FileFormat, + }, +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +pub struct HiveFormat { + pub row_format: Option, + pub storage: Option, + pub location: Option, +} + +impl Default for HiveFormat { + fn default() -> Self { + HiveFormat { + row_format: None, + location: None, + storage: None, + } + } +} + #[derive(Debug, Clone, PartialEq, Eq, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] pub struct SqlOption { diff --git a/src/ast/operator.rs b/src/ast/operator.rs index 57e70982f..732c81232 100644 --- a/src/ast/operator.rs +++ b/src/ast/operator.rs @@ -65,6 +65,7 @@ pub enum BinaryOperator { Lt, GtEq, LtEq, + Spaceship, Eq, NotEq, And, @@ -92,6 +93,7 @@ impl fmt::Display for BinaryOperator { BinaryOperator::Lt => "<", BinaryOperator::GtEq => ">=", BinaryOperator::LtEq => "<=", + BinaryOperator::Spaceship => "<=>", BinaryOperator::Eq => "=", BinaryOperator::NotEq => "<>", BinaryOperator::And => "AND", diff --git a/src/ast/query.rs b/src/ast/query.rs index 1b8ccf7e4..8f9ab499d 100644 --- a/src/ast/query.rs +++ b/src/ast/query.rs @@ -57,6 +57,7 @@ impl fmt::Display for Query { /// A node in a tree, representing a "query body" expression, roughly: /// `SELECT ... [ {UNION|EXCEPT|INTERSECT} SELECT ...]` +#[allow(clippy::large_enum_variant)] #[derive(Debug, Clone, PartialEq, Eq, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] pub enum SetExpr { @@ -73,6 +74,7 @@ pub enum SetExpr { right: Box, }, Values(Values), + Insert(Statement), // TODO: ANSI SQL supports `TABLE` here. } @@ -82,6 +84,7 @@ impl fmt::Display for SetExpr { SetExpr::Select(s) => write!(f, "{}", s), SetExpr::Query(q) => write!(f, "({})", q), SetExpr::Values(v) => write!(f, "{}", v), + SetExpr::Insert(v) => write!(f, "{}", v), SetExpr::SetOperation { left, right, @@ -126,10 +129,18 @@ pub struct Select { pub projection: Vec, /// FROM pub from: Vec, + /// LATERAL VIEWs + pub lateral_views: Vec, /// WHERE pub selection: Option, /// GROUP BY pub group_by: Vec, + /// CLUSTER BY (Hive) + pub cluster_by: Vec, + /// DISTRIBUTE BY (Hive) + pub distribute_by: Vec, + /// SORT BY (Hive) + pub sort_by: Vec, /// HAVING pub having: Option, } @@ -144,12 +155,34 @@ impl fmt::Display for Select { if !self.from.is_empty() { write!(f, " FROM {}", display_comma_separated(&self.from))?; } + if !self.lateral_views.is_empty() { + for lv in &self.lateral_views { + write!(f, "{}", lv)?; + } + } if let Some(ref selection) = self.selection { write!(f, " WHERE {}", selection)?; } if !self.group_by.is_empty() { write!(f, " GROUP BY {}", display_comma_separated(&self.group_by))?; } + if !self.cluster_by.is_empty() { + write!( + f, + " CLUSTER BY {}", + display_comma_separated(&self.cluster_by) + )?; + } + if !self.distribute_by.is_empty() { + write!( + f, + " DISTRIBUTE BY {}", + display_comma_separated(&self.distribute_by) + )?; + } + if !self.sort_by.is_empty() { + write!(f, " SORT BY {}", display_comma_separated(&self.sort_by))?; + } if let Some(ref having) = self.having { write!(f, " HAVING {}", having)?; } @@ -157,6 +190,40 @@ impl fmt::Display for Select { } } +/// A hive LATERAL VIEW with potential column aliases +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +pub struct LateralView { + /// LATERAL VIEW + pub lateral_view: Expr, + /// LATERAL VIEW table name + pub lateral_view_name: ObjectName, + /// LATERAL VIEW optional column aliases + pub lateral_col_alias: Vec, + /// LATERAL VIEW OUTER + pub outer: bool, +} + +impl fmt::Display for LateralView { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!( + f, + " LATERAL VIEW{outer} {} {}", + self.lateral_view, + self.lateral_view_name, + outer = if self.outer { " OUTER" } else { "" } + )?; + if !self.lateral_col_alias.is_empty() { + write!( + f, + " AS {}", + display_comma_separated(&self.lateral_col_alias) + )?; + } + Ok(()) + } +} + #[derive(Debug, Clone, PartialEq, Eq, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] pub struct With { @@ -184,11 +251,16 @@ impl fmt::Display for With { pub struct Cte { pub alias: TableAlias, pub query: Query, + pub from: Option, } impl fmt::Display for Cte { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "{} AS ({})", self.alias, self.query) + write!(f, "{} AS ({})", self.alias, self.query)?; + if let Some(ref fr) = self.from { + write!(f, " FROM {}", fr)?; + } + Ok(()) } } @@ -417,6 +489,7 @@ pub enum JoinConstraint { On(Expr), Using(Vec), Natural, + None, } /// An `ORDER BY` expression diff --git a/src/ast/value.rs b/src/ast/value.rs index 9e82c175d..2afdfaeae 100644 --- a/src/ast/value.rs +++ b/src/ast/value.rs @@ -22,15 +22,17 @@ use std::fmt; pub enum Value { /// Numeric literal #[cfg(not(feature = "bigdecimal"))] - Number(String), + Number(String, bool), #[cfg(feature = "bigdecimal")] - Number(BigDecimal), + Number(BigDecimal, bool), /// 'string value' SingleQuotedString(String), /// N'string value' NationalStringLiteral(String), /// X'hex value' HexStringLiteral(String), + + DoubleQuotedString(String), /// Boolean value true or false Boolean(bool), /// INTERVAL literals, roughly in the following format: @@ -59,7 +61,8 @@ pub enum Value { impl fmt::Display for Value { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { - Value::Number(v) => write!(f, "{}", v), + Value::Number(v, l) => write!(f, "{}{long}", v, long = if *l { "L" } else { "" }), + Value::DoubleQuotedString(v) => write!(f, "\"{}\"", v), Value::SingleQuotedString(v) => write!(f, "'{}'", escape_single_quote_string(v)), Value::NationalStringLiteral(v) => write!(f, "N'{}'", v), Value::HexStringLiteral(v) => write!(f, "X'{}'", v), diff --git a/src/dialect/hive.rs b/src/dialect/hive.rs new file mode 100644 index 000000000..9b42857ec --- /dev/null +++ b/src/dialect/hive.rs @@ -0,0 +1,39 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use crate::dialect::Dialect; + +#[derive(Debug)] +pub struct HiveDialect {} + +impl Dialect for HiveDialect { + fn is_delimited_identifier_start(&self, ch: char) -> bool { + (ch == '"') || (ch == '`') + } + + fn is_identifier_start(&self, ch: char) -> bool { + ('a'..='z').contains(&ch) + || ('A'..='Z').contains(&ch) + || ('0'..='9').contains(&ch) + || ch == '$' + } + + fn is_identifier_part(&self, ch: char) -> bool { + ('a'..='z').contains(&ch) + || ('A'..='Z').contains(&ch) + || ('0'..='9').contains(&ch) + || ch == '_' + || ch == '$' + || ch == '{' + || ch == '}' + } +} diff --git a/src/dialect/keywords.rs b/src/dialect/keywords.rs index 6e7065043..306cd19d6 100644 --- a/src/dialect/keywords.rs +++ b/src/dialect/keywords.rs @@ -103,6 +103,7 @@ define_keywords!( BOTH, BY, BYTEA, + CACHE, CALL, CALLED, CARDINALITY, @@ -120,6 +121,7 @@ define_keywords!( CHECK, CLOB, CLOSE, + CLUSTER, COALESCE, COLLATE, COLLECT, @@ -127,6 +129,7 @@ define_keywords!( COLUMNS, COMMIT, COMMITTED, + COMPUTE, CONDITION, CONNECT, CONSTRAINT, @@ -157,6 +160,7 @@ define_keywords!( CURRENT_USER, CURSOR, CYCLE, + DATABASE, DATE, DAY, DEALLOCATE, @@ -165,13 +169,16 @@ define_keywords!( DECLARE, DEFAULT, DELETE, + DELIMITED, DENSE_RANK, DEREF, DESC, DESCRIBE, DETERMINISTIC, + DIRECTORY, DISCONNECT, DISTINCT, + DISTRIBUTE, DOUBLE, DROP, DYNAMIC, @@ -206,6 +213,7 @@ define_keywords!( FOLLOWING, FOR, FOREIGN, + FORMAT, FRAME_ROW, FREE, FROM, @@ -220,6 +228,7 @@ define_keywords!( GROUPS, HAVING, HEADER, + HIVEVAR, HOLD, HOUR, IDENTITY, @@ -229,6 +238,7 @@ define_keywords!( INDICATOR, INNER, INOUT, + INPUTFORMAT, INSENSITIVE, INSERT, INT, @@ -262,11 +272,13 @@ define_keywords!( LOCALTIMESTAMP, LOCATION, LOWER, + MANAGEDLOCATION, MATCH, MATERIALIZED, MAX, MEMBER, MERGE, + METADATA, METHOD, MIN, MINUTE, @@ -274,6 +286,7 @@ define_keywords!( MODIFIES, MODULE, MONTH, + MSCK, MULTISET, NATIONAL, NATURAL, @@ -284,6 +297,7 @@ define_keywords!( NO, NONE, NORMALIZE, + NOSCAN, NOT, NTH_VALUE, NTILE, @@ -305,13 +319,17 @@ define_keywords!( ORDER, OUT, OUTER, + OUTPUTFORMAT, OVER, OVERFLOW, OVERLAPS, OVERLAY, + OVERWRITE, PARAMETER, PARQUET, PARTITION, + PARTITIONED, + PARTITIONS, PERCENT, PERCENTILE_CONT, PERCENTILE_DISC, @@ -327,6 +345,7 @@ define_keywords!( PREPARE, PRIMARY, PROCEDURE, + PURGE, RANGE, RANK, RCFILE, @@ -349,6 +368,7 @@ define_keywords!( REGR_SYY, RELEASE, RENAME, + REPAIR, REPEATABLE, REPLACE, RESTRICT, @@ -372,6 +392,7 @@ define_keywords!( SELECT, SENSITIVE, SEQUENCEFILE, + SERDE, SERIALIZABLE, SESSION, SESSION_USER, @@ -380,6 +401,7 @@ define_keywords!( SIMILAR, SMALLINT, SOME, + SORT, SPECIFIC, SPECIFICTYPE, SQL, @@ -389,21 +411,27 @@ define_keywords!( SQRT, START, STATIC, + STATISTICS, STDDEV_POP, STDDEV_SAMP, STDIN, STORED, + STRING, SUBMULTISET, SUBSTRING, SUBSTRING_REGEX, SUCCEEDS, SUM, SYMMETRIC, + SYNC, SYSTEM, SYSTEM_TIME, SYSTEM_USER, TABLE, TABLESAMPLE, + TBLPROPERTIES, + TEMP, + TEMPORARY, TEXT, TEXTFILE, THEN, @@ -473,9 +501,12 @@ pub const RESERVED_FOR_TABLE_ALIAS: &[Keyword] = &[ Keyword::SELECT, Keyword::WHERE, Keyword::GROUP, + Keyword::SORT, Keyword::HAVING, Keyword::ORDER, Keyword::TOP, + Keyword::LATERAL, + Keyword::VIEW, Keyword::LIMIT, Keyword::OFFSET, Keyword::FETCH, @@ -492,6 +523,8 @@ pub const RESERVED_FOR_TABLE_ALIAS: &[Keyword] = &[ Keyword::RIGHT, Keyword::NATURAL, Keyword::USING, + Keyword::CLUSTER, + Keyword::DISTRIBUTE, // for MSSQL-specific OUTER APPLY (seems reserved in most dialects) Keyword::OUTER, ]; @@ -506,15 +539,20 @@ pub const RESERVED_FOR_COLUMN_ALIAS: &[Keyword] = &[ Keyword::SELECT, Keyword::WHERE, Keyword::GROUP, + Keyword::SORT, Keyword::HAVING, Keyword::ORDER, Keyword::TOP, + Keyword::LATERAL, + Keyword::VIEW, Keyword::LIMIT, Keyword::OFFSET, Keyword::FETCH, Keyword::UNION, Keyword::EXCEPT, Keyword::INTERSECT, + Keyword::CLUSTER, + Keyword::DISTRIBUTE, // Reserved only as a column alias in the `SELECT` clause Keyword::FROM, ]; diff --git a/src/dialect/mod.rs b/src/dialect/mod.rs index e656ab269..c7041ad93 100644 --- a/src/dialect/mod.rs +++ b/src/dialect/mod.rs @@ -12,6 +12,7 @@ mod ansi; mod generic; +mod hive; pub mod keywords; mod mssql; mod mysql; @@ -24,6 +25,7 @@ use std::fmt::Debug; pub use self::ansi::AnsiDialect; pub use self::generic::GenericDialect; +pub use self::hive::HiveDialect; pub use self::mssql::MsSqlDialect; pub use self::mysql::MySqlDialect; pub use self::postgresql::PostgreSqlDialect; diff --git a/src/parser.rs b/src/parser.rs index 94afeb6e9..7a0b23101 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -48,12 +48,14 @@ pub enum IsOptional { Optional, Mandatory, } + use IsOptional::*; pub enum IsLateral { Lateral, NotLateral, } + use crate::ast::Statement::CreateVirtualTable; use IsLateral::*; @@ -137,6 +139,8 @@ impl<'a> Parser<'a> { self.prev_token(); Ok(Statement::Query(Box::new(self.parse_query()?))) } + Keyword::TRUNCATE => Ok(self.parse_truncate()?), + Keyword::MSCK => Ok(self.parse_msck()?), Keyword::CREATE => Ok(self.parse_create()?), Keyword::DROP => Ok(self.parse_drop()?), Keyword::DELETE => Ok(self.parse_delete()?), @@ -169,6 +173,104 @@ impl<'a> Parser<'a> { } } + pub fn parse_msck(&mut self) -> Result { + let repair = self.parse_keyword(Keyword::REPAIR); + self.expect_keyword(Keyword::TABLE)?; + let table_name = self.parse_object_name()?; + let partition_action = self + .maybe_parse(|parser| { + let pa = match parser.parse_one_of_keywords(&[ + Keyword::ADD, + Keyword::DROP, + Keyword::SYNC, + ]) { + Some(Keyword::ADD) => Some(AddDropSync::ADD), + Some(Keyword::DROP) => Some(AddDropSync::DROP), + Some(Keyword::SYNC) => Some(AddDropSync::SYNC), + _ => None, + }; + parser.expect_keyword(Keyword::PARTITIONS)?; + Ok(pa) + }) + .unwrap_or_default(); + Ok(Statement::Msck { + repair, + table_name, + partition_action, + }) + } + + pub fn parse_truncate(&mut self) -> Result { + self.expect_keyword(Keyword::TABLE)?; + let table_name = self.parse_object_name()?; + let mut partitions = None; + if self.parse_keyword(Keyword::PARTITION) { + self.expect_token(&Token::LParen)?; + partitions = Some(self.parse_comma_separated(Parser::parse_expr)?); + self.expect_token(&Token::RParen)?; + } + Ok(Statement::Truncate { + table_name, + partitions, + }) + } + + pub fn parse_analyze(&mut self) -> Result { + self.expect_keyword(Keyword::TABLE)?; + let table_name = self.parse_object_name()?; + let mut for_columns = false; + let mut cache_metadata = false; + let mut noscan = false; + let mut partitions = None; + let mut compute_statistics = false; + let mut columns = vec![]; + loop { + match self.parse_one_of_keywords(&[ + Keyword::PARTITION, + Keyword::FOR, + Keyword::CACHE, + Keyword::NOSCAN, + Keyword::COMPUTE, + ]) { + Some(Keyword::PARTITION) => { + self.expect_token(&Token::LParen)?; + partitions = Some(self.parse_comma_separated(Parser::parse_expr)?); + self.expect_token(&Token::RParen)?; + } + Some(Keyword::NOSCAN) => noscan = true, + Some(Keyword::FOR) => { + self.expect_keyword(Keyword::COLUMNS)?; + + columns = self + .maybe_parse(|parser| { + parser.parse_comma_separated(Parser::parse_identifier) + }) + .unwrap_or_default(); + for_columns = true + } + Some(Keyword::CACHE) => { + self.expect_keyword(Keyword::METADATA)?; + cache_metadata = true + } + Some(Keyword::COMPUTE) => { + self.expect_keyword(Keyword::STATISTICS)?; + compute_statistics = true + } + _ => break, + } + } + + Ok(Statement::Analyze { + table_name, + for_columns, + columns, + partitions, + cache_metadata, + noscan, + compute_statistics, + }) + } + /// Parse a new expression pub fn parse_expr(&mut self) -> Result { self.parse_subexpr(0) @@ -182,6 +284,7 @@ impl<'a> Parser<'a> { loop { let next_precedence = self.get_next_precedence()?; debug!("next precedence: {:?}", next_precedence); + if precedence >= next_precedence { break; } @@ -316,13 +419,14 @@ impl<'a> Parser<'a> { expr: Box::new(self.parse_subexpr(Self::PLUS_MINUS_PREC)?), }) } - Token::Number(_) + Token::Number(_, _) | Token::SingleQuotedString(_) | Token::NationalStringLiteral(_) | Token::HexStringLiteral(_) => { self.prev_token(); Ok(Expr::Value(self.parse_value()?)) } + Token::LParen => { let expr = if self.parse_keyword(Keyword::SELECT) || self.parse_keyword(Keyword::WITH) { @@ -334,7 +438,7 @@ impl<'a> Parser<'a> { self.expect_token(&Token::RParen)?; Ok(expr) } - unexpected => self.expected("an expression", unexpected), + unexpected => self.expected("an expression:", unexpected), }?; if self.parse_keyword(Keyword::COLLATE) { @@ -665,6 +769,8 @@ impl<'a> Parser<'a> { pub fn parse_infix(&mut self, expr: Expr, precedence: u8) -> Result { let tok = self.next_token(); let regular_binary_operator = match &tok { + Token::Spaceship => Some(BinaryOperator::Spaceship), + Token::DoubleEq => Some(BinaryOperator::Eq), Token::Eq => Some(BinaryOperator::Eq), Token::Neq => Some(BinaryOperator::NotEq), Token::Gt => Some(BinaryOperator::Gt), @@ -744,12 +850,27 @@ impl<'a> Parser<'a> { op: UnaryOperator::PGPostfixFactorial, expr: Box::new(expr), }) + } else if Token::LBracket == tok { + self.parse_map_access(expr) } else { // Can only happen if `get_next_precedence` got out of sync with this function panic!("No infix parser for token {:?}", tok) } } + pub fn parse_map_access(&mut self, expr: Expr) -> Result { + let key = self.parse_literal_string()?; + let tok = self.consume_token(&Token::RBracket); + debug!("Tok: {}", tok); + match expr { + e @ Expr::Identifier(_) | e @ Expr::CompoundIdentifier(_) => Ok(Expr::MapAccess { + column: Box::new(e), + key, + }), + _ => Ok(expr), + } + } + /// Parses the parens following the `[ NOT ] IN` operator pub fn parse_in(&mut self, expr: Expr, negated: bool) -> Result { self.expect_token(&Token::LParen)?; @@ -820,7 +941,14 @@ impl<'a> Parser<'a> { Token::Word(w) if w.keyword == Keyword::IN => Ok(Self::BETWEEN_PREC), Token::Word(w) if w.keyword == Keyword::BETWEEN => Ok(Self::BETWEEN_PREC), Token::Word(w) if w.keyword == Keyword::LIKE => Ok(Self::BETWEEN_PREC), - Token::Eq | Token::Lt | Token::LtEq | Token::Neq | Token::Gt | Token::GtEq => Ok(20), + Token::Eq + | Token::Lt + | Token::LtEq + | Token::Neq + | Token::Gt + | Token::GtEq + | Token::DoubleEq + | Token::Spaceship => Ok(20), Token::Pipe => Ok(21), Token::Caret | Token::Sharp | Token::ShiftRight | Token::ShiftLeft => Ok(22), Token::Ampersand => Ok(23), @@ -828,6 +956,7 @@ impl<'a> Parser<'a> { Token::Mult | Token::Div | Token::Mod | Token::StringConcat => Ok(40), Token::DoubleColon => Ok(50), Token::ExclamationMark => Ok(50), + Token::LBracket | Token::RBracket => Ok(10), _ => Ok(0), } } @@ -911,7 +1040,7 @@ impl<'a> Parser<'a> { let index = self.index; for &keyword in keywords { if !self.parse_keyword(keyword) { - //println!("parse_keywords aborting .. did not find {}", keyword); + // println!("parse_keywords aborting .. did not find {:?}", keyword); // reset index and return immediately self.index = index; return false; @@ -1034,8 +1163,11 @@ impl<'a> Parser<'a> { /// Parse a SQL CREATE statement pub fn parse_create(&mut self) -> Result { let or_replace = self.parse_keywords(&[Keyword::OR, Keyword::REPLACE]); + let temporary = self + .parse_one_of_keywords(&[Keyword::TEMP, Keyword::TEMPORARY]) + .is_some(); if self.parse_keyword(Keyword::TABLE) { - self.parse_create_table(or_replace) + self.parse_create_table(or_replace, temporary) } else if self.parse_keyword(Keyword::MATERIALIZED) || self.parse_keyword(Keyword::VIEW) { self.prev_token(); self.parse_create_view(or_replace) @@ -1088,31 +1220,67 @@ impl<'a> Parser<'a> { }) } + pub fn parse_create_database(&mut self) -> Result { + let ine = self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); + let db_name = self.parse_object_name()?; + let mut location = None; + let mut managed_location = None; + loop { + match self.parse_one_of_keywords(&[Keyword::LOCATION, Keyword::MANAGEDLOCATION]) { + Some(Keyword::LOCATION) => location = Some(self.parse_literal_string()?), + Some(Keyword::MANAGEDLOCATION) => { + managed_location = Some(self.parse_literal_string()?) + } + _ => break, + } + } + Ok(Statement::CreateDatabase { + db_name, + if_not_exists: ine, + location, + managed_location, + }) + } + pub fn parse_create_external_table( &mut self, or_replace: bool, ) -> Result { self.expect_keyword(Keyword::TABLE)?; + let if_not_exists = self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); let table_name = self.parse_object_name()?; let (columns, constraints) = self.parse_columns()?; - self.expect_keywords(&[Keyword::STORED, Keyword::AS])?; - let file_format = self.parse_file_format()?; - self.expect_keyword(Keyword::LOCATION)?; - let location = self.parse_literal_string()?; + let hive_distribution = self.parse_hive_distribution()?; + let hive_formats = self.parse_hive_formats()?; + let file_format = if let Some(ff) = &hive_formats.storage { + match ff { + HiveIOFormat::FileFormat { format } => Some(format.clone()), + _ => None, + } + } else { + None + }; + let location = hive_formats.location.clone(); + let table_properties = self.parse_options(Keyword::TBLPROPERTIES)?; Ok(Statement::CreateTable { name: table_name, columns, constraints, + hive_distribution, + hive_formats: Some(hive_formats), with_options: vec![], + table_properties, or_replace, - if_not_exists: false, + if_not_exists, external: true, - file_format: Some(file_format), - location: Some(location), + temporary: false, + file_format, + location, query: None, without_rowid: false, + like: None, }) } @@ -1139,7 +1307,7 @@ impl<'a> Parser<'a> { // ANSI SQL and Postgres support RECURSIVE here, but we don't support it either. let name = self.parse_object_name()?; let columns = self.parse_parenthesized_column_list(Optional)?; - let with_options = self.parse_with_options()?; + let with_options = self.parse_options(Keyword::WITH)?; self.expect_keyword(Keyword::AS)?; let query = Box::new(self.parse_query()?); // Optional `WITH [ CASCADED | LOCAL ] CHECK OPTION` is widely supported here. @@ -1171,6 +1339,7 @@ impl<'a> Parser<'a> { let names = self.parse_comma_separated(Parser::parse_object_name)?; let cascade = self.parse_keyword(Keyword::CASCADE); let restrict = self.parse_keyword(Keyword::RESTRICT); + let purge = self.parse_keyword(Keyword::PURGE); if cascade && restrict { return parser_err!("Cannot specify both CASCADE and RESTRICT in DROP"); } @@ -1179,6 +1348,7 @@ impl<'a> Parser<'a> { if_exists, names, cascade, + purge, }) } @@ -1199,18 +1369,85 @@ impl<'a> Parser<'a> { }) } - pub fn parse_create_table(&mut self, or_replace: bool) -> Result { + //TODO: Implement parsing for Skewed and Clustered + pub fn parse_hive_distribution(&mut self) -> Result { + if self.parse_keywords(&[Keyword::PARTITIONED, Keyword::BY]) { + self.expect_token(&Token::LParen)?; + let columns = self.parse_comma_separated(Parser::parse_column_def)?; + self.expect_token(&Token::RParen)?; + Ok(HiveDistributionStyle::PARTITIONED { columns }) + } else { + Ok(HiveDistributionStyle::NONE) + } + } + + pub fn parse_hive_formats(&mut self) -> Result { + let mut hive_format = HiveFormat::default(); + loop { + match self.parse_one_of_keywords(&[Keyword::ROW, Keyword::STORED, Keyword::LOCATION]) { + Some(Keyword::ROW) => { + hive_format.row_format = Some(self.parse_row_format()?); + } + Some(Keyword::STORED) => { + self.expect_keyword(Keyword::AS)?; + if self.parse_keyword(Keyword::INPUTFORMAT) { + let input_format = self.parse_expr()?; + self.expect_keyword(Keyword::OUTPUTFORMAT)?; + let output_format = self.parse_expr()?; + hive_format.storage = Some(HiveIOFormat::IOF { + input_format, + output_format, + }); + } else { + let format = self.parse_file_format()?; + hive_format.storage = Some(HiveIOFormat::FileFormat { format }); + } + } + Some(Keyword::LOCATION) => { + hive_format.location = Some(self.parse_literal_string()?); + } + None => break, + _ => break, + } + } + + Ok(hive_format) + } + + pub fn parse_row_format(&mut self) -> Result { + self.expect_keyword(Keyword::FORMAT)?; + match self.parse_one_of_keywords(&[Keyword::SERDE, Keyword::DELIMITED]) { + Some(Keyword::SERDE) => { + let class = self.parse_literal_string()?; + Ok(HiveRowFormat::SERDE { class }) + } + _ => Ok(HiveRowFormat::DELIMITED), + } + } + + pub fn parse_create_table( + &mut self, + or_replace: bool, + temporary: bool, + ) -> Result { let if_not_exists = self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); let table_name = self.parse_object_name()?; + let like = if self.parse_keyword(Keyword::LIKE) { + self.parse_object_name().ok() + } else { + None + }; // parse optional column list (schema) let (columns, constraints) = self.parse_columns()?; // SQLite supports `WITHOUT ROWID` at the end of `CREATE TABLE` let without_rowid = self.parse_keywords(&[Keyword::WITHOUT, Keyword::ROWID]); + let hive_distribution = self.parse_hive_distribution()?; + let hive_formats = self.parse_hive_formats()?; // PostgreSQL supports `WITH ( options )`, before `AS` - let with_options = self.parse_with_options()?; - + let with_options = self.parse_options(Keyword::WITH)?; + let table_properties = self.parse_options(Keyword::TBLPROPERTIES)?; // Parse optional `AS ( query )` let query = if self.parse_keyword(Keyword::AS) { Some(Box::new(self.parse_query()?)) @@ -1220,16 +1457,21 @@ impl<'a> Parser<'a> { Ok(Statement::CreateTable { name: table_name, + temporary, columns, constraints, with_options, + table_properties, or_replace, if_not_exists, + hive_distribution, + hive_formats: Some(hive_formats), external: false, file_format: None, location: None, query, without_rowid, + like, }) } @@ -1423,8 +1665,8 @@ impl<'a> Parser<'a> { } } - pub fn parse_with_options(&mut self) -> Result, ParserError> { - if self.parse_keyword(Keyword::WITH) { + pub fn parse_options(&mut self, keyword: Keyword) -> Result, ParserError> { + if self.parse_keyword(keyword) { self.expect_token(&Token::LParen)?; let options = self.parse_comma_separated(Parser::parse_sql_option)?; self.expect_token(&Token::RParen)?; @@ -1449,13 +1691,25 @@ impl<'a> Parser<'a> { if let Some(constraint) = self.parse_optional_table_constraint()? { AlterTableOperation::AddConstraint(constraint) } else { - let _ = self.parse_keyword(Keyword::COLUMN); - let column_def = self.parse_column_def()?; - AlterTableOperation::AddColumn { column_def } + let if_not_exists = + self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); + if self.parse_keyword(Keyword::PARTITION) { + self.expect_token(&Token::LParen)?; + let partitions = self.parse_comma_separated(Parser::parse_expr)?; + self.expect_token(&Token::RParen)?; + AlterTableOperation::AddPartitions { + if_not_exists, + new_partitions: partitions, + } + } else { + let _ = self.parse_keyword(Keyword::COLUMN); + let column_def = self.parse_column_def()?; + AlterTableOperation::AddColumn { column_def } + } } } else if self.parse_keyword(Keyword::RENAME) { if self.parse_keyword(Keyword::TO) { - let table_name = self.parse_identifier()?; + let table_name = self.parse_object_name()?; AlterTableOperation::RenameTable { table_name } } else { let _ = self.parse_keyword(Keyword::COLUMN); @@ -1468,17 +1722,51 @@ impl<'a> Parser<'a> { } } } else if self.parse_keyword(Keyword::DROP) { - let _ = self.parse_keyword(Keyword::COLUMN); - let if_exists = self.parse_keywords(&[Keyword::IF, Keyword::EXISTS]); - let column_name = self.parse_identifier()?; - let cascade = self.parse_keyword(Keyword::CASCADE); - AlterTableOperation::DropColumn { - column_name, - if_exists, - cascade, + if self.parse_keywords(&[Keyword::IF, Keyword::EXISTS, Keyword::PARTITION]) { + self.expect_token(&Token::LParen)?; + let partitions = self.parse_comma_separated(Parser::parse_expr)?; + self.expect_token(&Token::RParen)?; + AlterTableOperation::DropPartitions { + partitions, + if_exists: true, + } + } else if self.parse_keyword(Keyword::PARTITION) { + self.expect_token(&Token::LParen)?; + let partitions = self.parse_comma_separated(Parser::parse_expr)?; + self.expect_token(&Token::RParen)?; + AlterTableOperation::DropPartitions { + partitions, + if_exists: false, + } + } else { + let _ = self.parse_keyword(Keyword::COLUMN); + let if_exists = self.parse_keywords(&[Keyword::IF, Keyword::EXISTS]); + let column_name = self.parse_identifier()?; + let cascade = self.parse_keyword(Keyword::CASCADE); + AlterTableOperation::DropColumn { + column_name, + if_exists, + cascade, + } + } + } else if self.parse_keyword(Keyword::PARTITION) { + self.expect_token(&Token::LParen)?; + let before = self.parse_comma_separated(Parser::parse_expr)?; + self.expect_token(&Token::RParen)?; + self.expect_keyword(Keyword::RENAME)?; + self.expect_keywords(&[Keyword::TO, Keyword::PARTITION])?; + self.expect_token(&Token::LParen)?; + let renames = self.parse_comma_separated(Parser::parse_expr)?; + self.expect_token(&Token::RParen)?; + AlterTableOperation::RenamePartitions { + old_partitions: before, + new_partitions: renames, } } else { - return self.expected("ADD, RENAME, or DROP after ALTER TABLE", self.peek_token()); + return self.expected( + "ADD, RENAME, PARTITION or DROP after ALTER TABLE", + self.peek_token(), + ); }; Ok(Statement::AlterTable { name: table_name, @@ -1545,13 +1833,18 @@ impl<'a> Parser<'a> { Keyword::TRUE => Ok(Value::Boolean(true)), Keyword::FALSE => Ok(Value::Boolean(false)), Keyword::NULL => Ok(Value::Null), + Keyword::NoKeyword if w.quote_style.is_some() => match w.quote_style { + Some('"') => Ok(Value::DoubleQuotedString(w.value)), + Some('\'') => Ok(Value::SingleQuotedString(w.value)), + _ => self.expected("A value?", Token::Word(w))?, + }, _ => self.expected("a concrete value", Token::Word(w)), }, // The call to n.parse() returns a bigdecimal when the // bigdecimal feature is enabled, and is otherwise a no-op // (i.e., it returns the input string). - Token::Number(ref n) => match n.parse() { - Ok(n) => Ok(Value::Number(n)), + Token::Number(ref n, l) => match n.parse() { + Ok(n) => Ok(Value::Number(n, l)), Err(e) => parser_err!(format!("Could not parse '{}' as number: {}", n, e)), }, Token::SingleQuotedString(ref s) => Ok(Value::SingleQuotedString(s.to_string())), @@ -1563,7 +1856,7 @@ impl<'a> Parser<'a> { pub fn parse_number_value(&mut self) -> Result { match self.parse_value()? { - v @ Value::Number(_) => Ok(v), + v @ Value::Number(_, _) => Ok(v), _ => { self.prev_token(); self.expected("literal number", self.peek_token()) @@ -1574,7 +1867,7 @@ impl<'a> Parser<'a> { /// Parse an unsigned literal integer/long pub fn parse_literal_uint(&mut self) -> Result { match self.next_token() { - Token::Number(s) => s.parse::().map_err(|e| { + Token::Number(s, _) => s.parse::().map_err(|e| { ParserError::ParserError(format!("Could not parse '{}' as u64: {}", s, e)) }), unexpected => self.expected("literal int", unexpected), @@ -1584,6 +1877,7 @@ impl<'a> Parser<'a> { /// Parse a literal string pub fn parse_literal_string(&mut self) -> Result { match self.next_token() { + Token::Word(Word { value, keyword, .. }) if keyword == Keyword::NoKeyword => Ok(value), Token::SingleQuotedString(s) => Ok(s), unexpected => self.expected("literal string", unexpected), } @@ -1632,6 +1926,7 @@ impl<'a> Parser<'a> { // parse_interval_literal for a taste. Keyword::INTERVAL => Ok(DataType::Interval), Keyword::REGCLASS => Ok(DataType::Regclass), + Keyword::STRING => Ok(DataType::String), Keyword::TEXT => { if self.consume_token(&Token::LBracket) { // Note: this is postgresql-specific @@ -1730,6 +2025,7 @@ impl<'a> Parser<'a> { pub fn parse_identifier(&mut self) -> Result { match self.next_token() { Token::Word(w) => Ok(w.to_ident()), + Token::SingleQuotedString(s) => Ok(Ident::with_quote('\'', s)), unexpected => self.expected("identifier", unexpected), } } @@ -1805,15 +2101,6 @@ impl<'a> Parser<'a> { }) } - pub fn parse_analyze(&mut self) -> Result { - // ANALYZE TABLE table_name - self.expect_keyword(Keyword::TABLE)?; - - let table_name = self.parse_object_name()?; - - Ok(Statement::Analyze { table_name }) - } - /// Parse a query expression, i.e. a `SELECT` statement optionally /// preceeded with some `WITH` CTE declarations and optionally followed /// by `ORDER BY`. Unlike some other parse_... methods, this one doesn't @@ -1828,53 +2115,88 @@ impl<'a> Parser<'a> { None }; - let body = self.parse_query_body(0)?; + if !self.parse_keyword(Keyword::INSERT) { + let body = self.parse_query_body(0)?; - let order_by = if self.parse_keywords(&[Keyword::ORDER, Keyword::BY]) { - self.parse_comma_separated(Parser::parse_order_by_expr)? - } else { - vec![] - }; + let order_by = if self.parse_keywords(&[Keyword::ORDER, Keyword::BY]) { + self.parse_comma_separated(Parser::parse_order_by_expr)? + } else { + vec![] + }; - let limit = if self.parse_keyword(Keyword::LIMIT) { - self.parse_limit()? - } else { - None - }; + let limit = if self.parse_keyword(Keyword::LIMIT) { + self.parse_limit()? + } else { + None + }; - let offset = if self.parse_keyword(Keyword::OFFSET) { - Some(self.parse_offset()?) - } else { - None - }; + let offset = if self.parse_keyword(Keyword::OFFSET) { + Some(self.parse_offset()?) + } else { + None + }; - let fetch = if self.parse_keyword(Keyword::FETCH) { - Some(self.parse_fetch()?) - } else { - None - }; + let fetch = if self.parse_keyword(Keyword::FETCH) { + Some(self.parse_fetch()?) + } else { + None + }; - Ok(Query { - with, - body, - limit, - order_by, - offset, - fetch, - }) + Ok(Query { + with, + body, + limit, + order_by, + offset, + fetch, + }) + } else { + let insert = self.parse_insert()?; + Ok(Query { + with, + body: SetExpr::Insert(insert), + limit: None, + order_by: vec![], + offset: None, + fetch: None, + }) + } } /// Parse a CTE (`alias [( col1, col2, ... )] AS (subquery)`) fn parse_cte(&mut self) -> Result { - let alias = TableAlias { - name: self.parse_identifier()?, - columns: self.parse_parenthesized_column_list(Optional)?, + let name = self.parse_identifier()?; + + let mut cte = if self.parse_keyword(Keyword::AS) { + self.expect_token(&Token::LParen)?; + let query = self.parse_query()?; + self.expect_token(&Token::RParen)?; + let alias = TableAlias { + name, + columns: vec![], + }; + Cte { + alias, + query, + from: None, + } + } else { + let columns = self.parse_parenthesized_column_list(Optional)?; + self.expect_keyword(Keyword::AS)?; + self.expect_token(&Token::LParen)?; + let query = self.parse_query()?; + self.expect_token(&Token::RParen)?; + let alias = TableAlias { name, columns }; + Cte { + alias, + query, + from: None, + } }; - self.expect_keyword(Keyword::AS)?; - self.expect_token(&Token::LParen)?; - let query = self.parse_query()?; - self.expect_token(&Token::RParen)?; - Ok(Cte { alias, query }) + if self.parse_keyword(Keyword::FROM) { + cte.from = Some(self.parse_identifier()?); + } + Ok(cte) } /// Parse a "query body", which is an expression with roughly the @@ -1962,6 +2284,37 @@ impl<'a> Parser<'a> { } else { vec![] }; + let mut lateral_views = vec![]; + loop { + if self.parse_keywords(&[Keyword::LATERAL, Keyword::VIEW]) { + let outer = self.parse_keyword(Keyword::OUTER); + let lateral_view = self.parse_expr()?; + let lateral_view_name = self.parse_object_name()?; + let lateral_col_alias = self + .parse_comma_separated(|parser| { + parser.parse_optional_alias(&[ + Keyword::WHERE, + Keyword::GROUP, + Keyword::CLUSTER, + Keyword::HAVING, + Keyword::LATERAL, + ]) // This couldn't possibly be a bad idea + })? + .into_iter() + .filter(|i| i.is_some()) + .map(|i| i.unwrap()) + .collect(); + + lateral_views.push(LateralView { + lateral_view, + lateral_view_name, + lateral_col_alias, + outer, + }); + } else { + break; + } + } let selection = if self.parse_keyword(Keyword::WHERE) { Some(self.parse_expr()?) @@ -1975,6 +2328,24 @@ impl<'a> Parser<'a> { vec![] }; + let cluster_by = if self.parse_keywords(&[Keyword::CLUSTER, Keyword::BY]) { + self.parse_comma_separated(Parser::parse_expr)? + } else { + vec![] + }; + + let distribute_by = if self.parse_keywords(&[Keyword::DISTRIBUTE, Keyword::BY]) { + self.parse_comma_separated(Parser::parse_expr)? + } else { + vec![] + }; + + let sort_by = if self.parse_keywords(&[Keyword::SORT, Keyword::BY]) { + self.parse_comma_separated(Parser::parse_expr)? + } else { + vec![] + }; + let having = if self.parse_keyword(Keyword::HAVING) { Some(self.parse_expr()?) } else { @@ -1987,26 +2358,42 @@ impl<'a> Parser<'a> { projection, from, selection, + lateral_views, group_by, + cluster_by, + distribute_by, + sort_by, having, }) } pub fn parse_set(&mut self) -> Result { - let modifier = self.parse_one_of_keywords(&[Keyword::SESSION, Keyword::LOCAL]); + let modifier = + self.parse_one_of_keywords(&[Keyword::SESSION, Keyword::LOCAL, Keyword::HIVEVAR]); + if let Some(Keyword::HIVEVAR) = modifier { + self.expect_token(&Token::Colon)?; + } let variable = self.parse_identifier()?; if self.consume_token(&Token::Eq) || self.parse_keyword(Keyword::TO) { - let token = self.peek_token(); - let value = match (self.parse_value(), token) { - (Ok(value), _) => SetVariableValue::Literal(value), - (Err(_), Token::Word(ident)) => SetVariableValue::Ident(ident.to_ident()), - (Err(_), unexpected) => self.expected("variable value", unexpected)?, - }; - Ok(Statement::SetVariable { - local: modifier == Some(Keyword::LOCAL), - variable, - value, - }) + let mut values = vec![]; + loop { + let token = self.peek_token(); + let value = match (self.parse_value(), token) { + (Ok(value), _) => SetVariableValue::Literal(value), + (Err(_), Token::Word(ident)) => SetVariableValue::Ident(ident.to_ident()), + (Err(_), unexpected) => self.expected("variable value", unexpected)?, + }; + values.push(value); + if self.consume_token(&Token::Comma) { + continue; + } + return Ok(Statement::SetVariable { + local: modifier == Some(Keyword::LOCAL), + hivevar: Some(Keyword::HIVEVAR) == modifier, + variable, + value: values, + }); + } } else if variable.value == "TRANSACTION" && modifier.is_none() { Ok(Statement::SetTransaction { modes: self.parse_transaction_modes()?, @@ -2119,7 +2506,7 @@ impl<'a> Parser<'a> { } } Keyword::OUTER => { - return self.expected("LEFT, RIGHT, or FULL", self.peek_token()) + return self.expected("LEFT, RIGHT, or FULL", self.peek_token()); } _ if natural => { return self.expected("a join type after NATURAL", self.peek_token()); @@ -2290,21 +2677,61 @@ impl<'a> Parser<'a> { let columns = self.parse_parenthesized_column_list(Mandatory)?; Ok(JoinConstraint::Using(columns)) } else { - self.expected("ON, or USING after JOIN", self.peek_token()) + Ok(JoinConstraint::None) + //self.expected("ON, or USING after JOIN", self.peek_token()) } } /// Parse an INSERT statement pub fn parse_insert(&mut self) -> Result { - self.expect_keyword(Keyword::INTO)?; - let table_name = self.parse_object_name()?; - let columns = self.parse_parenthesized_column_list(Optional)?; - let source = Box::new(self.parse_query()?); - Ok(Statement::Insert { - table_name, - columns, - source, - }) + let action = self.expect_one_of_keywords(&[Keyword::INTO, Keyword::OVERWRITE])?; + let overwrite = action == Keyword::OVERWRITE; + let local = self.parse_keyword(Keyword::LOCAL); + + if self.parse_keyword(Keyword::DIRECTORY) { + let path = self.parse_literal_string()?; + let file_format = if self.parse_keywords(&[Keyword::STORED, Keyword::AS]) { + Some(self.parse_file_format()?) + } else { + None + }; + let source = Box::new(self.parse_query()?); + Ok(Statement::Directory { + local, + path, + overwrite, + file_format, + source, + }) + } else { + // Hive lets you put table here regardless + let table = self.parse_keyword(Keyword::TABLE); + let table_name = self.parse_object_name()?; + let columns = self.parse_parenthesized_column_list(Optional)?; + + let partitioned = if self.parse_keyword(Keyword::PARTITION) { + self.expect_token(&Token::LParen)?; + let r = Some(self.parse_comma_separated(Parser::parse_expr)?); + self.expect_token(&Token::RParen)?; + r + } else { + None + }; + + // Hive allows you to specify columns after partitions as well if you want. + let after_columns = self.parse_parenthesized_column_list(Optional)?; + + let source = Box::new(self.parse_query()?); + Ok(Statement::Insert { + table_name, + overwrite, + partitioned, + columns, + after_columns, + source, + table, + }) + } } pub fn parse_update(&mut self) -> Result { diff --git a/src/test_utils.rs b/src/test_utils.rs index 2fcacffa9..160d2c110 100644 --- a/src/test_utils.rs +++ b/src/test_utils.rs @@ -132,6 +132,7 @@ pub fn all_dialects() -> TestedDialects { Box::new(MsSqlDialect {}), Box::new(AnsiDialect {}), Box::new(SnowflakeDialect {}), + Box::new(HiveDialect {}), ], } } @@ -153,7 +154,7 @@ pub fn expr_from_projection(item: &SelectItem) -> &Expr { } pub fn number(n: &'static str) -> Value { - Value::Number(n.parse().unwrap()) + Value::Number(n.parse().unwrap(), false) } pub fn table_alias(name: impl Into) -> Option { diff --git a/src/tokenizer.rs b/src/tokenizer.rs index bbad1a4c4..fd33f9589 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -35,7 +35,7 @@ pub enum Token { /// A keyword (like SELECT) or an optionally quoted SQL identifier Word(Word), /// An unsigned numeric literal - Number(String), + Number(String, bool), /// A character that could not be tokenized Char(char), /// Single quoted string: i.e: 'string' @@ -48,6 +48,8 @@ pub enum Token { Comma, /// Whitespace (space, tab, etc) Whitespace(Whitespace), + /// Double equals sign `==` + DoubleEq, /// Equality operator `=` Eq, /// Not Equals operator `<>` (or `!=` in some dialects) @@ -60,6 +62,8 @@ pub enum Token { LtEq, /// Greater Than Or Equals operator `>=` GtEq, + /// Spaceship operator <=> + Spaceship, /// Plus operator `+` Plus, /// Minus operator `-` @@ -127,13 +131,15 @@ impl fmt::Display for Token { match self { Token::EOF => f.write_str("EOF"), Token::Word(ref w) => write!(f, "{}", w), - Token::Number(ref n) => f.write_str(n), + Token::Number(ref n, l) => write!(f, "{}{long}", n, long = if *l { "L" } else { "" }), Token::Char(ref c) => write!(f, "{}", c), Token::SingleQuotedString(ref s) => write!(f, "'{}'", s), Token::NationalStringLiteral(ref s) => write!(f, "N'{}'", s), Token::HexStringLiteral(ref s) => write!(f, "X'{}'", s), Token::Comma => f.write_str(","), Token::Whitespace(ws) => write!(f, "{}", ws), + Token::DoubleEq => f.write_str("=="), + Token::Spaceship => f.write_str("<=>"), Token::Eq => f.write_str("="), Token::Neq => f.write_str("<>"), Token::Lt => f.write_str("<"), @@ -296,7 +302,7 @@ impl<'a> Tokenizer<'a> { Token::Whitespace(Whitespace::Tab) => self.col += 4, Token::Word(w) if w.quote_style == None => self.col += w.value.len() as u64, Token::Word(w) if w.quote_style != None => self.col += w.value.len() as u64 + 2, - Token::Number(s) => self.col += s.len() as u64, + Token::Number(s, _) => self.col += s.len() as u64, Token::SingleQuotedString(s) => self.col += s.len() as u64, _ => self.col += 1, } @@ -358,6 +364,15 @@ impl<'a> Tokenizer<'a> { ch if self.dialect.is_identifier_start(ch) => { chars.next(); // consume the first char let s = self.tokenize_word(ch, chars); + + if s.chars().all(|x| ('0'..='9').contains(&x) || x == '.') { + let mut s = peeking_take_while(&mut s.chars().peekable(), |ch| { + matches!(ch, '0'..='9' | '.') + }); + let s2 = peeking_take_while(chars, |ch| matches!(ch, '0'..='9' | '.')); + s += s2.as_str(); + return Ok(Some(Token::Number(s, false))); + } Ok(Some(Token::make_word(&s, None))) } // string @@ -383,7 +398,13 @@ impl<'a> Tokenizer<'a> { '0'..='9' => { // TODO: https://jakewheat.github.io/sql-overview/sql-2011-foundation-grammar.html#unsigned-numeric-literal let s = peeking_take_while(chars, |ch| matches!(ch, '0'..='9' | '.')); - Ok(Some(Token::Number(s))) + let long = if chars.peek() == Some(&'L') { + chars.next(); + true + } else { + false + }; + Ok(Some(Token::Number(s, long))) } // punctuation '(' => self.consume_and_return(chars, Token::LParen), @@ -461,7 +482,13 @@ impl<'a> Tokenizer<'a> { '<' => { chars.next(); // consume match chars.peek() { - Some('=') => self.consume_and_return(chars, Token::LtEq), + Some('=') => { + chars.next(); + match chars.peek() { + Some('>') => self.consume_and_return(chars, Token::Spaceship), + _ => Ok(Some(Token::LtEq)), + } + } Some('>') => self.consume_and_return(chars, Token::Neq), Some('<') => self.consume_and_return(chars, Token::ShiftLeft), _ => Ok(Some(Token::Lt)), @@ -634,7 +661,7 @@ mod tests { let expected = vec![ Token::make_keyword("SELECT"), Token::Whitespace(Whitespace::Space), - Token::Number(String::from("1")), + Token::Number(String::from("1"), false), ]; compare(expected, tokens); @@ -652,7 +679,7 @@ mod tests { Token::Whitespace(Whitespace::Space), Token::make_word("sqrt", None), Token::LParen, - Token::Number(String::from("1")), + Token::Number(String::from("1"), false), Token::RParen, ]; @@ -724,11 +751,11 @@ mod tests { Token::Whitespace(Whitespace::Space), Token::Eq, Token::Whitespace(Whitespace::Space), - Token::Number(String::from("1")), + Token::Number(String::from("1"), false), Token::Whitespace(Whitespace::Space), Token::make_keyword("LIMIT"), Token::Whitespace(Whitespace::Space), - Token::Number(String::from("5")), + Token::Number(String::from("5"), false), ]; compare(expected, tokens); @@ -758,7 +785,7 @@ mod tests { Token::Whitespace(Whitespace::Space), Token::Eq, Token::Whitespace(Whitespace::Space), - Token::Number(String::from("1")), + Token::Number(String::from("1"), false), ]; compare(expected, tokens); @@ -790,7 +817,7 @@ mod tests { Token::Whitespace(Whitespace::Space), Token::Eq, Token::Whitespace(Whitespace::Space), - Token::Number(String::from("1")), + Token::Number(String::from("1"), false), ]; compare(expected, tokens); @@ -943,12 +970,12 @@ mod tests { let mut tokenizer = Tokenizer::new(&dialect, &sql); let tokens = tokenizer.tokenize().unwrap(); let expected = vec![ - Token::Number("0".to_string()), + Token::Number("0".to_string(), false), Token::Whitespace(Whitespace::SingleLineComment { prefix: "--".to_string(), comment: "this is a comment\n".to_string(), }), - Token::Number("1".to_string()), + Token::Number("1".to_string(), false), ]; compare(expected, tokens); } @@ -975,11 +1002,11 @@ mod tests { let mut tokenizer = Tokenizer::new(&dialect, &sql); let tokens = tokenizer.tokenize().unwrap(); let expected = vec![ - Token::Number("0".to_string()), + Token::Number("0".to_string(), false), Token::Whitespace(Whitespace::MultiLineComment( "multi-line\n* /comment".to_string(), )), - Token::Number("1".to_string()), + Token::Number("1".to_string(), false), ]; compare(expected, tokens); } @@ -1046,7 +1073,7 @@ mod tests { Token::Whitespace(Whitespace::Space), Token::make_keyword("TOP"), Token::Whitespace(Whitespace::Space), - Token::Number(String::from("5")), + Token::Number(String::from("5"), false), Token::Whitespace(Whitespace::Space), Token::make_word("bar", Some('[')), Token::Whitespace(Whitespace::Space), diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index e7d78f950..ab4aa457b 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -92,7 +92,7 @@ fn parse_insert_invalid() { let sql = "INSERT public.customer (id, name, active) VALUES (1, 2, 3)"; let res = parse_sql_statements(sql); assert_eq!( - ParserError::ParserError("Expected INTO, found: public".to_string()), + ParserError::ParserError("Expected one of INTO or OVERWRITE, found: public".to_string()), res.unwrap_err() ); } @@ -454,11 +454,11 @@ fn parse_number() { #[cfg(feature = "bigdecimal")] assert_eq!( expr, - Expr::Value(Value::Number(bigdecimal::BigDecimal::from(1))) + Expr::Value(Value::Number(bigdecimal::BigDecimal::from(1), false)) ); #[cfg(not(feature = "bigdecimal"))] - assert_eq!(expr, Expr::Value(Value::Number("1.0".into()))); + assert_eq!(expr, Expr::Value(Value::Number("1.0".into(), false))); } #[test] @@ -894,7 +894,7 @@ fn parse_select_having() { name: ObjectName(vec![Ident::new("COUNT")]), args: vec![FunctionArg::Unnamed(Expr::Wildcard)], over: None, - distinct: false + distinct: false, })), op: BinaryOperator::Gt, right: Box::new(Expr::Value(number("1"))) @@ -1639,18 +1639,6 @@ fn parse_explain_analyze_with_simple_select() { ); } -#[test] -fn parse_simple_analyze() { - let sql = "ANALYZE TABLE t"; - let stmt = verified_stmt(sql); - assert_eq!( - stmt, - Statement::Analyze { - table_name: ObjectName(vec![Ident::new("t")]) - } - ); -} - #[test] fn parse_named_argument_function() { let sql = "SELECT FUN(a => '1', b => '2') FROM foo"; @@ -2390,7 +2378,7 @@ fn parse_ctes() { fn assert_ctes_in_select(expected: &[&str], sel: &Query) { for (i, exp) in expected.iter().enumerate() { - let Cte { alias, query } = &sel.with.as_ref().unwrap().cte_tables[i]; + let Cte { alias, query, .. } = &sel.with.as_ref().unwrap().cte_tables[i]; assert_eq!(*exp, query.to_string()); assert_eq!( if i == 0 { @@ -2479,6 +2467,7 @@ fn parse_recursive_cte() { }], }, query: cte_query, + from: None, }; assert_eq!(with.cte_tables.first().unwrap(), &expected); } @@ -2799,6 +2788,7 @@ fn parse_drop_table() { if_exists, names, cascade, + purge: _, } => { assert_eq!(false, if_exists); assert_eq!(ObjectType::Table, object_type); @@ -2818,6 +2808,7 @@ fn parse_drop_table() { if_exists, names, cascade, + purge: _, } => { assert_eq!(true, if_exists); assert_eq!(ObjectType::Table, object_type); diff --git a/tests/sqlparser_hive.rs b/tests/sqlparser_hive.rs new file mode 100644 index 000000000..585be989b --- /dev/null +++ b/tests/sqlparser_hive.rs @@ -0,0 +1,212 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#![warn(clippy::all)] + +//! Test SQL syntax specific to Hive. The parser based on the generic dialect +//! is also tested (on the inputs it can handle). + +use sqlparser::dialect::HiveDialect; +use sqlparser::test_utils::*; + +#[test] +fn parse_table_create() { + let sql = r#"CREATE TABLE IF NOT EXISTS db.table (a BIGINT, b STRING, c TIMESTAMP) PARTITIONED BY (d STRING, e TIMESTAMP) STORED AS ORC LOCATION 's3://...' TBLPROPERTIES ("prop" = "2", "asdf" = '1234', 'asdf' = "1234", "asdf" = 2)"#; + let iof = r#"CREATE TABLE IF NOT EXISTS db.table (a BIGINT, b STRING, c TIMESTAMP) PARTITIONED BY (d STRING, e TIMESTAMP) STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat' OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat' LOCATION 's3://...'"#; + + hive().verified_stmt(sql); + hive().verified_stmt(iof); +} + +#[test] +fn parse_insert_overwrite() { + let insert_partitions = r#"INSERT OVERWRITE TABLE db.new_table PARTITION (a = '1', b) SELECT a, b, c FROM db.table"#; + hive().verified_stmt(insert_partitions); +} + +#[test] +fn test_truncate() { + let truncate = r#"TRUNCATE TABLE db.table"#; + hive().verified_stmt(truncate); +} + +#[test] +fn parse_analyze() { + let analyze = r#"ANALYZE TABLE db.table_name PARTITION (a = '1234', b) COMPUTE STATISTICS NOSCAN CACHE METADATA"#; + hive().verified_stmt(analyze); +} + +#[test] +fn parse_analyze_for_columns() { + let analyze = + r#"ANALYZE TABLE db.table_name PARTITION (a = '1234', b) COMPUTE STATISTICS FOR COLUMNS"#; + hive().verified_stmt(analyze); +} + +#[test] +fn parse_msck() { + let msck = r#"MSCK REPAIR TABLE db.table_name ADD PARTITIONS"#; + let msck2 = r#"MSCK REPAIR TABLE db.table_name"#; + hive().verified_stmt(msck); + hive().verified_stmt(msck2); +} + +#[test] +fn parse_set() { + let set = "SET HIVEVAR:name = a, b, c_d"; + hive().verified_stmt(set); +} + +#[test] +fn test_spaceship() { + let spaceship = "SELECT * FROM db.table WHERE a <=> b"; + hive().verified_stmt(spaceship); +} + +#[test] +fn parse_with_cte() { + let with = "WITH a AS (SELECT * FROM b) INSERT INTO TABLE db.table_table PARTITION (a) SELECT * FROM b"; + hive().verified_stmt(with); +} + +#[test] +fn drop_table_purge() { + let purge = "DROP TABLE db.table_name PURGE"; + hive().verified_stmt(purge); +} + +#[test] +fn create_table_like() { + let like = "CREATE TABLE db.table_name LIKE db.other_table"; + hive().verified_stmt(like); +} + +// Turning off this test until we can parse identifiers starting with numbers :( +#[test] +fn test_identifier() { + let between = "SELECT a AS 3_barrr_asdf FROM db.table_name"; + hive().verified_stmt(between); +} + +#[test] +fn test_alter_partition() { + let alter = "ALTER TABLE db.table PARTITION (a = 2) RENAME TO PARTITION (a = 1)"; + hive().verified_stmt(alter); +} + +#[test] +fn test_add_partition() { + let add = "ALTER TABLE db.table ADD IF NOT EXISTS PARTITION (a = 'asdf', b = 2)"; + hive().verified_stmt(add); +} + +#[test] +fn test_drop_partition() { + let drop = "ALTER TABLE db.table DROP PARTITION (a = 1)"; + hive().verified_stmt(drop); +} + +#[test] +fn test_drop_if_exists() { + let drop = "ALTER TABLE db.table DROP IF EXISTS PARTITION (a = 'b', c = 'd')"; + hive().verified_stmt(drop); +} + +#[test] +fn test_cluster_by() { + let cluster = "SELECT a FROM db.table CLUSTER BY a, b"; + hive().verified_stmt(cluster); +} + +#[test] +fn test_distribute_by() { + let cluster = "SELECT a FROM db.table DISTRIBUTE BY a, b"; + hive().verified_stmt(cluster); +} + +#[test] +fn no_join_condition() { + let join = "SELECT a, b FROM db.table_name JOIN a"; + hive().verified_stmt(join); +} + +#[test] +fn columns_after_partition() { + let query = "INSERT INTO db.table_name PARTITION (a, b) (c, d) SELECT a, b, c, d FROM db.table"; + hive().verified_stmt(query); +} + +#[test] +fn long_numerics() { + let query = r#"SELECT MIN(MIN(10, 5), 1L) AS a"#; + hive().verified_stmt(query); +} + +#[test] +fn decimal_precision() { + let query = "SELECT CAST(a AS DECIMAL(18,2)) FROM db.table"; + let expected = "SELECT CAST(a AS NUMERIC(18,2)) FROM db.table"; + hive().one_statement_parses_to(query, expected); +} + +#[test] +fn create_temp_table() { + let query = "CREATE TEMPORARY TABLE db.table (a INT NOT NULL)"; + let query2 = "CREATE TEMP TABLE db.table (a INT NOT NULL)"; + + hive().verified_stmt(query); + hive().one_statement_parses_to(query2, query); +} + +#[test] +fn create_local_directory() { + let query = + "INSERT OVERWRITE LOCAL DIRECTORY '/home/blah' STORED AS TEXTFILE SELECT * FROM db.table"; + hive().verified_stmt(query); +} + +#[test] +fn lateral_view() { + let view = "SELECT a FROM db.table LATERAL VIEW explode(a) t AS j, P LATERAL VIEW OUTER explode(a) t AS a, b WHERE a = 1"; + hive().verified_stmt(view); +} + +#[test] +fn sort_by() { + let sort_by = "SELECT * FROM db.table SORT BY a"; + hive().verified_stmt(sort_by); +} + +#[test] +fn rename_table() { + let rename = "ALTER TABLE db.table_name RENAME TO db.table_2"; + hive().verified_stmt(rename); +} + +#[test] +fn map_access() { + let rename = "SELECT a.b[\"asdf\"] FROM db.table WHERE a = 2"; + hive().verified_stmt(rename); +} + +#[test] +fn from_cte() { + let rename = + "WITH cte AS (SELECT * FROM a.b) FROM cte INSERT INTO TABLE a.b PARTITION (a) SELECT *"; + println!("{}", hive().verified_stmt(rename)); +} + +fn hive() -> TestedDialects { + TestedDialects { + dialects: vec![Box::new(HiveDialect {})], + } +} diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index 20f186100..2abd8ae9b 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -364,8 +364,9 @@ fn parse_set() { stmt, Statement::SetVariable { local: false, + hivevar: false, variable: "a".into(), - value: SetVariableValue::Ident("b".into()), + value: vec![SetVariableValue::Ident("b".into())], } ); @@ -374,8 +375,11 @@ fn parse_set() { stmt, Statement::SetVariable { local: false, + hivevar: false, variable: "a".into(), - value: SetVariableValue::Literal(Value::SingleQuotedString("b".into())), + value: vec![SetVariableValue::Literal(Value::SingleQuotedString( + "b".into() + ))], } ); @@ -384,8 +388,9 @@ fn parse_set() { stmt, Statement::SetVariable { local: false, + hivevar: false, variable: "a".into(), - value: SetVariableValue::Literal(number("0")), + value: vec![SetVariableValue::Literal(number("0"))], } ); @@ -394,8 +399,9 @@ fn parse_set() { stmt, Statement::SetVariable { local: false, + hivevar: false, variable: "a".into(), - value: SetVariableValue::Ident("DEFAULT".into()), + value: vec![SetVariableValue::Ident("DEFAULT".into())], } ); @@ -404,8 +410,9 @@ fn parse_set() { stmt, Statement::SetVariable { local: true, + hivevar: false, variable: "a".into(), - value: SetVariableValue::Ident("b".into()), + value: vec![SetVariableValue::Ident("b".into())], } ); From 6f0b2dcd92907c871c0a2654b5962c780c0b9046 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dani=C3=ABl=20Heres?= Date: Sun, 7 Feb 2021 16:06:50 +0100 Subject: [PATCH 12/23] Implement SUBSTRING(col [FROM ] [FOR ]) syntax (#293) --- src/ast/mod.rs | 22 ++++++++++++++++++++++ src/parser.rs | 22 ++++++++++++++++++++++ tests/sqlparser_common.rs | 17 +++++++++++++++++ tests/sqlparser_regression.rs | 5 ++--- 4 files changed, 63 insertions(+), 3 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 1999451d5..d0e321185 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -201,10 +201,17 @@ pub enum Expr { expr: Box, data_type: DataType, }, + /// EXTRACT(DateTimeField FROM ) Extract { field: DateTimeField, expr: Box, }, + /// SUBSTRING( [FROM ] [FOR ]) + Substring { + expr: Box, + substring_from: Option>, + substring_for: Option>, + }, /// `expr COLLATE collation` Collate { expr: Box, @@ -333,6 +340,21 @@ impl fmt::Display for Expr { Expr::Exists(s) => write!(f, "EXISTS ({})", s), Expr::Subquery(s) => write!(f, "({})", s), Expr::ListAgg(listagg) => write!(f, "{}", listagg), + Expr::Substring { + expr, + substring_from, + substring_for, + } => { + write!(f, "SUBSTRING({}", expr)?; + if let Some(from_part) = substring_from { + write!(f, " FROM {}", from_part)?; + } + if let Some(from_part) = substring_for { + write!(f, " FOR {}", from_part)?; + } + + write!(f, ")") + } } } } diff --git a/src/parser.rs b/src/parser.rs index 7a0b23101..bee671f04 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -350,6 +350,7 @@ impl<'a> Parser<'a> { Keyword::CAST => self.parse_cast_expr(), Keyword::EXISTS => self.parse_exists_expr(), Keyword::EXTRACT => self.parse_extract_expr(), + Keyword::SUBSTRING => self.parse_substring_expr(), Keyword::INTERVAL => self.parse_literal_interval(), Keyword::LISTAGG => self.parse_listagg_expr(), Keyword::NOT => Ok(Expr::UnaryOp { @@ -606,6 +607,27 @@ impl<'a> Parser<'a> { }) } + pub fn parse_substring_expr(&mut self) -> Result { + // PARSE SUBSTRING (EXPR [FROM 1] [FOR 3]) + self.expect_token(&Token::LParen)?; + let expr = self.parse_expr()?; + let mut from_expr = None; + let mut to_expr = None; + if self.parse_keyword(Keyword::FROM) { + from_expr = Some(self.parse_expr()?); + } + if self.parse_keyword(Keyword::FOR) { + to_expr = Some(self.parse_expr()?); + } + self.expect_token(&Token::RParen)?; + + Ok(Expr::Substring { + expr: Box::new(expr), + substring_from: from_expr.map(Box::new), + substring_for: to_expr.map(Box::new), + }) + } + /// Parse a SQL LISTAGG expression, e.g. `LISTAGG(...) WITHIN GROUP (ORDER BY ...)`. pub fn parse_listagg_expr(&mut self) -> Result { self.expect_token(&Token::LParen)?; diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index ab4aa457b..f302245ee 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -2598,6 +2598,23 @@ fn parse_scalar_subqueries() { ); } +#[test] +fn parse_substring() { + one_statement_parses_to("SELECT SUBSTRING('1')", "SELECT SUBSTRING('1')"); + + one_statement_parses_to( + "SELECT SUBSTRING('1' FROM 1)", + "SELECT SUBSTRING('1' FROM 1)", + ); + + one_statement_parses_to( + "SELECT SUBSTRING('1' FROM 1 FOR 3)", + "SELECT SUBSTRING('1' FROM 1 FOR 3)", + ); + + one_statement_parses_to("SELECT SUBSTRING('1' FOR 3)", "SELECT SUBSTRING('1' FOR 3)"); +} + #[test] fn parse_exists_subquery() { let expected_inner = verified_query("SELECT 1"); diff --git a/tests/sqlparser_regression.rs b/tests/sqlparser_regression.rs index bbf1b2977..1fc35d99c 100644 --- a/tests/sqlparser_regression.rs +++ b/tests/sqlparser_regression.rs @@ -25,10 +25,9 @@ macro_rules! tpch_tests { #[test] fn $name() { let dialect = GenericDialect {}; - let res = Parser::parse_sql(&dialect, QUERIES[$value -1]); - // Ignore 6.sql and 22.sql - if $value != 6 && $value != 22 { + // Ignore 6.sql + if $value != 6 { assert!(res.is_ok()); } } From f40955ee82cf2c0c0d411025cdaf62bb7543def8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dani=C3=ABl=20Heres?= Date: Mon, 8 Feb 2021 08:11:01 +0100 Subject: [PATCH 13/23] Parse floats without leading number (#294) * Parse floats without leading number * Move period token test * Comments * Enable test --- src/tokenizer.rs | 36 ++++++++++++++++++++++++++++++----- tests/sqlparser_regression.rs | 3 --- 2 files changed, 31 insertions(+), 8 deletions(-) diff --git a/src/tokenizer.rs b/src/tokenizer.rs index fd33f9589..d82810528 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -394,10 +394,21 @@ impl<'a> Tokenizer<'a> { ) } } - // numbers - '0'..='9' => { - // TODO: https://jakewheat.github.io/sql-overview/sql-2011-foundation-grammar.html#unsigned-numeric-literal - let s = peeking_take_while(chars, |ch| matches!(ch, '0'..='9' | '.')); + // numbers and period + '0'..='9' | '.' => { + let mut s = peeking_take_while(chars, |ch| matches!(ch, '0'..='9')); + // match one period + if let Some('.') = chars.peek() { + s.push('.'); + chars.next(); + } + s += &peeking_take_while(chars, |ch| matches!(ch, '0'..='9')); + + // No number -> Token::Period + if s == "." { + return Ok(Some(Token::Period)); + } + let long = if chars.peek() == Some(&'L') { chars.next(); true @@ -470,7 +481,6 @@ impl<'a> Tokenizer<'a> { _ => Ok(Some(Token::Eq)), } } - '.' => self.consume_and_return(chars, Token::Period), '!' => { chars.next(); // consume match chars.peek() { @@ -667,6 +677,22 @@ mod tests { compare(expected, tokens); } + #[test] + fn tokenize_select_float() { + let sql = String::from("SELECT .1"); + let dialect = GenericDialect {}; + let mut tokenizer = Tokenizer::new(&dialect, &sql); + let tokens = tokenizer.tokenize().unwrap(); + + let expected = vec![ + Token::make_keyword("SELECT"), + Token::Whitespace(Whitespace::Space), + Token::Number(String::from(".1"), false), + ]; + + compare(expected, tokens); + } + #[test] fn tokenize_scalar_function() { let sql = String::from("SELECT sqrt(1)"); diff --git a/tests/sqlparser_regression.rs b/tests/sqlparser_regression.rs index 1fc35d99c..e869e0932 100644 --- a/tests/sqlparser_regression.rs +++ b/tests/sqlparser_regression.rs @@ -26,10 +26,7 @@ macro_rules! tpch_tests { fn $name() { let dialect = GenericDialect {}; let res = Parser::parse_sql(&dialect, QUERIES[$value -1]); - // Ignore 6.sql - if $value != 6 { assert!(res.is_ok()); - } } )* } From 07342d585321e719e7d729bf96661b3dbb1417ab Mon Sep 17 00:00:00 2001 From: Francis Du Date: Wed, 10 Feb 2021 04:03:49 +0800 Subject: [PATCH 14/23] Support parsing multiple show variables. (#290) * feat: support parsing multiple show variables. * fix: fix fmt error --- src/ast/mod.rs | 10 ++++++++-- src/parser.rs | 15 ++++++++++++++- tests/sqlparser_postgres.rs | 8 ++++---- 3 files changed, 26 insertions(+), 7 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index d0e321185..9a96e34bc 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -637,7 +637,7 @@ pub enum Statement { /// SHOW /// /// Note: this is a PostgreSQL-specific statement. - ShowVariable { variable: Ident }, + ShowVariable { variable: Vec }, /// SHOW COLUMNS /// /// Note: this is a MySQL-specific statement. @@ -1136,7 +1136,13 @@ impl fmt::Display for Statement { value = display_comma_separated(value) ) } - Statement::ShowVariable { variable } => write!(f, "SHOW {}", variable), + Statement::ShowVariable { variable } => { + write!(f, "SHOW")?; + if !variable.is_empty() { + write!(f, " {}", display_separated(variable, " "))?; + } + Ok(()) + } Statement::ShowColumns { extended, full, diff --git a/src/parser.rs b/src/parser.rs index bee671f04..621209fd7 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -2043,6 +2043,19 @@ impl<'a> Parser<'a> { Ok(ObjectName(idents)) } + /// Parse identifiers + pub fn parse_identifiers(&mut self) -> Result, ParserError> { + let mut idents = vec![]; + loop { + match self.next_token() { + Token::Word(w) => idents.push(w.to_ident()), + Token::EOF => break, + _ => {} + } + } + Ok(idents) + } + /// Parse a simple one-word identifier (possibly quoted, possibly a keyword) pub fn parse_identifier(&mut self) -> Result { match self.next_token() { @@ -2439,7 +2452,7 @@ impl<'a> Parser<'a> { self.parse_show_columns() } else { Ok(Statement::ShowVariable { - variable: self.parse_identifier()?, + variable: self.parse_identifiers()?, }) } } diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index 2abd8ae9b..bcfce30fc 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -443,19 +443,19 @@ fn parse_set() { #[test] fn parse_show() { - let stmt = pg_and_generic().verified_stmt("SHOW a"); + let stmt = pg_and_generic().verified_stmt("SHOW a a"); assert_eq!( stmt, Statement::ShowVariable { - variable: "a".into() + variable: vec!["a".into(), "a".into()] } ); - let stmt = pg_and_generic().verified_stmt("SHOW ALL"); + let stmt = pg_and_generic().verified_stmt("SHOW ALL ALL"); assert_eq!( stmt, Statement::ShowVariable { - variable: "ALL".into() + variable: vec!["ALL".into(), "ALL".into()] } ) } From add8991144fa9b3bc259ae566034474e34b395e2 Mon Sep 17 00:00:00 2001 From: zhangli-pear <57003334+zhangli-pear@users.noreply.github.com> Date: Wed, 10 Feb 2021 04:04:54 +0800 Subject: [PATCH 15/23] feat: support sqlite insert or statement (#281) --- src/ast/mod.rs | 48 +++++++++++++++++++++++++++++++++------ src/dialect/keywords.rs | 3 +++ src/parser.rs | 22 ++++++++++++++++++ tests/sqlparser_common.rs | 41 +++++++++++++++++++++++++++++++-- 4 files changed, 105 insertions(+), 9 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 9a96e34bc..94951693c 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -508,6 +508,8 @@ pub enum Statement { Query(Box), /// INSERT Insert { + /// Only for Sqlite + or: Option, /// TABLE table_name: ObjectName, /// COLUMNS @@ -804,6 +806,7 @@ impl fmt::Display for Statement { Ok(()) } Statement::Insert { + or, table_name, overwrite, partitioned, @@ -812,13 +815,17 @@ impl fmt::Display for Statement { source, table, } => { - write!( - f, - "INSERT {act}{tbl} {table_name} ", - table_name = table_name, - act = if *overwrite { "OVERWRITE" } else { "INTO" }, - tbl = if *table { " TABLE" } else { "" } - )?; + if let Some(action) = or { + write!(f, "INSERT OR {} INTO {} ", action, table_name)?; + } else { + write!( + f, + "INSERT {act}{tbl} {table_name} ", + table_name = table_name, + act = if *overwrite { "OVERWRITE" } else { "INTO" }, + tbl = if *table { " TABLE" } else { "" } + )?; + } if !columns.is_empty() { write!(f, "({}) ", display_comma_separated(columns))?; } @@ -832,6 +839,7 @@ impl fmt::Display for Statement { } write!(f, "{}", source) } + Statement::Copy { table_name, columns, @@ -1560,3 +1568,29 @@ impl fmt::Display for SetVariableValue { } } } + +/// Sqlite specific syntax +/// +/// https://sqlite.org/lang_conflict.html +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +pub enum SqliteOnConflict { + Rollback, + Abort, + Fail, + Ignore, + Replace, +} + +impl fmt::Display for SqliteOnConflict { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + use SqliteOnConflict::*; + match self { + Rollback => write!(f, "ROLLBACK"), + Abort => write!(f, "ABORT"), + Fail => write!(f, "FAIL"), + Ignore => write!(f, "IGNORE"), + Replace => write!(f, "REPLACE"), + } + } +} diff --git a/src/dialect/keywords.rs b/src/dialect/keywords.rs index 306cd19d6..1d2690fc0 100644 --- a/src/dialect/keywords.rs +++ b/src/dialect/keywords.rs @@ -66,6 +66,7 @@ macro_rules! define_keywords { // The following keywords should be sorted to be able to match using binary search define_keywords!( + ABORT, ABS, ACTION, ADD, @@ -202,6 +203,7 @@ define_keywords!( EXTENDED, EXTERNAL, EXTRACT, + FAIL, FALSE, FETCH, FIELDS, @@ -233,6 +235,7 @@ define_keywords!( HOUR, IDENTITY, IF, + IGNORE, IN, INDEX, INDICATOR, diff --git a/src/parser.rs b/src/parser.rs index 621209fd7..eab2ece12 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -163,6 +163,10 @@ impl<'a> Parser<'a> { Keyword::DEALLOCATE => Ok(self.parse_deallocate()?), Keyword::EXECUTE => Ok(self.parse_execute()?), Keyword::PREPARE => Ok(self.parse_prepare()?), + Keyword::REPLACE if dialect_of!(self is SQLiteDialect ) => { + self.prev_token(); + Ok(self.parse_insert()?) + } _ => self.expected("an SQL statement", Token::Word(w)), }, Token::LParen => { @@ -2719,6 +2723,23 @@ impl<'a> Parser<'a> { /// Parse an INSERT statement pub fn parse_insert(&mut self) -> Result { + let or = if !dialect_of!(self is SQLiteDialect) { + None + } else if self.parse_keywords(&[Keyword::OR, Keyword::REPLACE]) { + Some(SqliteOnConflict::Replace) + } else if self.parse_keywords(&[Keyword::OR, Keyword::ROLLBACK]) { + Some(SqliteOnConflict::Rollback) + } else if self.parse_keywords(&[Keyword::OR, Keyword::ABORT]) { + Some(SqliteOnConflict::Abort) + } else if self.parse_keywords(&[Keyword::OR, Keyword::FAIL]) { + Some(SqliteOnConflict::Fail) + } else if self.parse_keywords(&[Keyword::OR, Keyword::IGNORE]) { + Some(SqliteOnConflict::Ignore) + } else if self.parse_keyword(Keyword::REPLACE) { + Some(SqliteOnConflict::Replace) + } else { + None + }; let action = self.expect_one_of_keywords(&[Keyword::INTO, Keyword::OVERWRITE])?; let overwrite = action == Keyword::OVERWRITE; let local = self.parse_keyword(Keyword::LOCAL); @@ -2758,6 +2779,7 @@ impl<'a> Parser<'a> { let source = Box::new(self.parse_query()?); Ok(Statement::Insert { + or, table_name, overwrite, partitioned, diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index f302245ee..fbf2faf9b 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -24,8 +24,8 @@ use test_utils::{all_dialects, expr_from_projection, join, number, only, table, use matches::assert_matches; use sqlparser::ast::*; -use sqlparser::dialect::keywords::ALL_KEYWORDS; -use sqlparser::parser::ParserError; +use sqlparser::dialect::{keywords::ALL_KEYWORDS, SQLiteDialect}; +use sqlparser::parser::{Parser, ParserError}; #[test] fn parse_insert_values() { @@ -97,6 +97,43 @@ fn parse_insert_invalid() { ); } +#[test] +fn parse_insert_sqlite() { + let dialect = SQLiteDialect {}; + + let check = |sql: &str, expected_action: Option| match Parser::parse_sql( + &dialect, &sql, + ) + .unwrap() + .pop() + .unwrap() + { + Statement::Insert { or, .. } => assert_eq!(or, expected_action), + _ => panic!(sql.to_string()), + }; + + let sql = "INSERT INTO test_table(id) VALUES(1)"; + check(sql, None); + + let sql = "REPLACE INTO test_table(id) VALUES(1)"; + check(sql, Some(SqliteOnConflict::Replace)); + + let sql = "INSERT OR REPLACE INTO test_table(id) VALUES(1)"; + check(sql, Some(SqliteOnConflict::Replace)); + + let sql = "INSERT OR ROLLBACK INTO test_table(id) VALUES(1)"; + check(sql, Some(SqliteOnConflict::Rollback)); + + let sql = "INSERT OR ABORT INTO test_table(id) VALUES(1)"; + check(sql, Some(SqliteOnConflict::Abort)); + + let sql = "INSERT OR FAIL INTO test_table(id) VALUES(1)"; + check(sql, Some(SqliteOnConflict::Fail)); + + let sql = "INSERT OR IGNORE INTO test_table(id) VALUES(1)"; + check(sql, Some(SqliteOnConflict::Ignore)); +} + #[test] fn parse_update() { let sql = "UPDATE t SET a = 1, b = 2, c = 3 WHERE d"; From a868ff6ebe7102af51e9f0e64c3afa521a74649d Mon Sep 17 00:00:00 2001 From: "Heres, Daniel" Date: Tue, 9 Feb 2021 21:30:05 +0100 Subject: [PATCH 16/23] Add release notes --- CHANGELOG.md | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index c1c5a763f..0701b23dd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,16 @@ Given that the parser produces a typed AST, any changes to the AST will technica ## [Unreleased] Check https://github.com/ballista-compute/sqlparser-rs/commits/main for undocumented changes. + +## [0.8.0] 2020-02-20 + +### Added +* Introduce Hive QL dialect `HiveDialect` and syntax (#235) - Thanks @hntd187! +* Add `SUBSTRING(col [FROM ] [FOR ])` syntax (#293) +* Support parsing floats without leading digits `.01` (#294) +* Support parsing multiple show variables (#290) - Thanks @francis-du! +* Support SQLite `INSERT OR [..]` syntax (#281) - Thanks @zhangli-pear! + ## [0.7.0] 2020-12-28 ### Changed From 34cd794cd3ce2578aa2e2474a49146ddcc686d2c Mon Sep 17 00:00:00 2001 From: "Heres, Daniel" Date: Tue, 9 Feb 2021 21:31:27 +0100 Subject: [PATCH 17/23] (cargo-release) version 0.8.0 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 231d7fee5..4b1b6c15c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "sqlparser" description = "Extensible SQL Lexer and Parser with support for ANSI SQL:2011" -version = "0.7.1-alpha.0" +version = "0.8.0" authors = ["Andy Grove "] homepage = "https://github.com/ballista-compute/sqlparser-rs" documentation = "https://docs.rs/sqlparser/" From 43fef23bc8039b3849c1328c99741875bf76fe54 Mon Sep 17 00:00:00 2001 From: "Heres, Daniel" Date: Tue, 9 Feb 2021 21:31:27 +0100 Subject: [PATCH 18/23] (cargo-release) start next development iteration 0.8.1-alpha.0 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 4b1b6c15c..8d4ce5b0d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "sqlparser" description = "Extensible SQL Lexer and Parser with support for ANSI SQL:2011" -version = "0.8.0" +version = "0.8.1-alpha.0" authors = ["Andy Grove "] homepage = "https://github.com/ballista-compute/sqlparser-rs" documentation = "https://docs.rs/sqlparser/" From e6e37b47db249e9b59787d25d813776846080573 Mon Sep 17 00:00:00 2001 From: Mike Seddon Date: Mon, 22 Mar 2021 09:26:16 +1100 Subject: [PATCH 19/23] Implement TRY_CAST (#299) Adds support for `TRY_CAST` and fixes a clippy error --- src/ast/mod.rs | 7 +++++++ src/dialect/keywords.rs | 1 + src/parser.rs | 27 ++++++++++++++++++++------- src/tokenizer.rs | 1 + tests/sqlparser_common.rs | 30 ++++++++++++++++++++++++++++++ 5 files changed, 59 insertions(+), 7 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 94951693c..5f8eec8bc 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -201,6 +201,12 @@ pub enum Expr { expr: Box, data_type: DataType, }, + /// TRY_CAST an expression to a different data type e.g. `TRY_CAST(foo AS VARCHAR(123))` + // this differs from CAST in the choice of how to implement invalid conversions + TryCast { + expr: Box, + data_type: DataType, + }, /// EXTRACT(DateTimeField FROM ) Extract { field: DateTimeField, @@ -309,6 +315,7 @@ impl fmt::Display for Expr { } } Expr::Cast { expr, data_type } => write!(f, "CAST({} AS {})", expr, data_type), + Expr::TryCast { expr, data_type } => write!(f, "TRY_CAST({} AS {})", expr, data_type), Expr::Extract { field, expr } => write!(f, "EXTRACT({} FROM {})", field, expr), Expr::Collate { expr, collation } => write!(f, "{} COLLATE {}", expr, collation), Expr::Nested(ast) => write!(f, "({})", ast), diff --git a/src/dialect/keywords.rs b/src/dialect/keywords.rs index 1d2690fc0..3371ff570 100644 --- a/src/dialect/keywords.rs +++ b/src/dialect/keywords.rs @@ -456,6 +456,7 @@ define_keywords!( TRIM_ARRAY, TRUE, TRUNCATE, + TRY_CAST, UESCAPE, UNBOUNDED, UNCOMMITTED, diff --git a/src/parser.rs b/src/parser.rs index eab2ece12..bacae7873 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -352,6 +352,7 @@ impl<'a> Parser<'a> { } Keyword::CASE => self.parse_case_expr(), Keyword::CAST => self.parse_cast_expr(), + Keyword::TRY_CAST => self.parse_try_cast_expr(), Keyword::EXISTS => self.parse_exists_expr(), Keyword::EXTRACT => self.parse_extract_expr(), Keyword::SUBSTRING => self.parse_substring_expr(), @@ -591,6 +592,19 @@ impl<'a> Parser<'a> { }) } + /// Parse a SQL TRY_CAST function e.g. `TRY_CAST(expr AS FLOAT)` + pub fn parse_try_cast_expr(&mut self) -> Result { + self.expect_token(&Token::LParen)?; + let expr = self.parse_expr()?; + self.expect_keyword(Keyword::AS)?; + let data_type = self.parse_data_type()?; + self.expect_token(&Token::RParen)?; + Ok(Expr::TryCast { + expr: Box::new(expr), + data_type, + }) + } + /// Parse a SQL EXISTS expression e.g. `WHERE EXISTS(SELECT ...)`. pub fn parse_exists_expr(&mut self) -> Result { self.expect_token(&Token::LParen)?; @@ -1806,7 +1820,7 @@ impl<'a> Parser<'a> { let columns = self.parse_parenthesized_column_list(Optional)?; self.expect_keywords(&[Keyword::FROM, Keyword::STDIN])?; self.expect_token(&Token::SemiColon)?; - let values = self.parse_tsv()?; + let values = self.parse_tsv(); Ok(Statement::Copy { table_name, columns, @@ -1816,12 +1830,11 @@ impl<'a> Parser<'a> { /// Parse a tab separated values in /// COPY payload - fn parse_tsv(&mut self) -> Result>, ParserError> { - let values = self.parse_tab_value()?; - Ok(values) + fn parse_tsv(&mut self) -> Vec> { + self.parse_tab_value() } - fn parse_tab_value(&mut self) -> Result>, ParserError> { + fn parse_tab_value(&mut self) -> Vec> { let mut values = vec![]; let mut content = String::from(""); while let Some(t) = self.next_token_no_skip() { @@ -1836,7 +1849,7 @@ impl<'a> Parser<'a> { } Token::Backslash => { if self.consume_token(&Token::Period) { - return Ok(values); + return values; } if let Token::Word(w) = self.next_token() { if w.value == "N" { @@ -1849,7 +1862,7 @@ impl<'a> Parser<'a> { } } } - Ok(values) + values } /// Parse a literal value (numbers, strings, date/time, booleans) diff --git a/src/tokenizer.rs b/src/tokenizer.rs index d82810528..ce48fa018 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -626,6 +626,7 @@ impl<'a> Tokenizer<'a> { } } + #[allow(clippy::unnecessary_wraps)] fn consume_and_return( &self, chars: &mut Peekable>, diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index fbf2faf9b..10ac79d84 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -981,6 +981,35 @@ fn parse_cast() { ); } +#[test] +fn parse_try_cast() { + let sql = "SELECT TRY_CAST(id AS BIGINT) FROM customer"; + let select = verified_only_select(sql); + assert_eq!( + &Expr::TryCast { + expr: Box::new(Expr::Identifier(Ident::new("id"))), + data_type: DataType::BigInt + }, + expr_from_projection(only(&select.projection)) + ); + one_statement_parses_to( + "SELECT TRY_CAST(id AS BIGINT) FROM customer", + "SELECT TRY_CAST(id AS BIGINT) FROM customer", + ); + + verified_stmt("SELECT TRY_CAST(id AS NUMERIC) FROM customer"); + + one_statement_parses_to( + "SELECT TRY_CAST(id AS DEC) FROM customer", + "SELECT TRY_CAST(id AS NUMERIC) FROM customer", + ); + + one_statement_parses_to( + "SELECT TRY_CAST(id AS DECIMAL) FROM customer", + "SELECT TRY_CAST(id AS NUMERIC) FROM customer", + ); +} + #[test] fn parse_extract() { let sql = "SELECT EXTRACT(YEAR FROM d)"; @@ -1224,6 +1253,7 @@ fn parse_assert() { } #[test] +#[allow(clippy::collapsible_match)] fn parse_assert_message() { let sql = "ASSERT (SELECT COUNT(*) FROM my_table) > 0 AS 'No rows in my_table'"; let ast = one_statement_parses_to( From c2340d182189f7c0b001ddc0947bf8d6c69f0eaa Mon Sep 17 00:00:00 2001 From: "Heres, Daniel" Date: Sun, 21 Mar 2021 23:28:22 +0100 Subject: [PATCH 20/23] Add release notes for 0.8.0 --- CHANGELOG.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0701b23dd..bcf1ab6a4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,11 @@ Given that the parser produces a typed AST, any changes to the AST will technica Check https://github.com/ballista-compute/sqlparser-rs/commits/main for undocumented changes. +## [0.8.0] 2020-03-21 + +### Added +* Add support for `TRY_CAST` syntax (#299) - Thanks @seddonm1! + ## [0.8.0] 2020-02-20 ### Added From f52891d0da5eb6655e45e654fa5d70fd6f4e7cc7 Mon Sep 17 00:00:00 2001 From: "Heres, Daniel" Date: Sun, 21 Mar 2021 23:29:18 +0100 Subject: [PATCH 21/23] (cargo-release) version 0.9.0 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 8d4ce5b0d..e086c2e03 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "sqlparser" description = "Extensible SQL Lexer and Parser with support for ANSI SQL:2011" -version = "0.8.1-alpha.0" +version = "0.9.0" authors = ["Andy Grove "] homepage = "https://github.com/ballista-compute/sqlparser-rs" documentation = "https://docs.rs/sqlparser/" From 1e87ab8e226d6aca4b66de5d7da0c853d98e9271 Mon Sep 17 00:00:00 2001 From: "Heres, Daniel" Date: Sun, 21 Mar 2021 23:29:18 +0100 Subject: [PATCH 22/23] (cargo-release) start next development iteration 0.9.1-alpha.0 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index e086c2e03..e0ee25ff0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "sqlparser" description = "Extensible SQL Lexer and Parser with support for ANSI SQL:2011" -version = "0.9.0" +version = "0.9.1-alpha.0" authors = ["Andy Grove "] homepage = "https://github.com/ballista-compute/sqlparser-rs" documentation = "https://docs.rs/sqlparser/" From a9e6f77d623724379c933ace1e3033390f1873ff Mon Sep 17 00:00:00 2001 From: Max Countryman Date: Sat, 20 Mar 2021 08:54:19 -0700 Subject: [PATCH 23/23] provide ILIKE support This introduces support for ILIKE and NOT ILIKE. ILIKE is the case-insensitive variant of LIKE. Systems such as Postgres, Redshift, and Snowflake provide this variant.[1][2][3] [1] https://www.postgresql.org/docs/7.3/functions-matching.html [2] https://docs.aws.amazon.com/redshift/latest/dg/r_patternmatching_condition_like.html [3] https://docs.snowflake.com/en/sql-reference/functions/ilike.html --- src/ast/mod.rs | 2 ++ src/ast/operator.rs | 4 ++++ src/dialect/keywords.rs | 1 + src/parser.rs | 11 +++++++++- tests/sqlparser_common.rs | 45 +++++++++++++++++++++++++++++++++++++++ 5 files changed, 62 insertions(+), 1 deletion(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 5f8eec8bc..462dae516 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -1546,6 +1546,7 @@ impl fmt::Display for TransactionIsolationLevel { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] pub enum ShowStatementFilter { Like(String), + ILike(String), Where(Expr), } @@ -1554,6 +1555,7 @@ impl fmt::Display for ShowStatementFilter { use ShowStatementFilter::*; match self { Like(pattern) => write!(f, "LIKE '{}'", value::escape_single_quote_string(pattern)), + ILike(pattern) => write!(f, "ILIKE {}", value::escape_single_quote_string(pattern)), Where(expr) => write!(f, "WHERE {}", expr), } } diff --git a/src/ast/operator.rs b/src/ast/operator.rs index 732c81232..ff978fb97 100644 --- a/src/ast/operator.rs +++ b/src/ast/operator.rs @@ -72,6 +72,8 @@ pub enum BinaryOperator { Or, Like, NotLike, + ILike, + NotILike, BitwiseOr, BitwiseAnd, BitwiseXor, @@ -100,6 +102,8 @@ impl fmt::Display for BinaryOperator { BinaryOperator::Or => "OR", BinaryOperator::Like => "LIKE", BinaryOperator::NotLike => "NOT LIKE", + BinaryOperator::ILike => "ILIKE", + BinaryOperator::NotILike => "NOT ILIKE", BinaryOperator::BitwiseOr => "|", BinaryOperator::BitwiseAnd => "&", BinaryOperator::BitwiseXor => "^", diff --git a/src/dialect/keywords.rs b/src/dialect/keywords.rs index 3371ff570..8b88496b6 100644 --- a/src/dialect/keywords.rs +++ b/src/dialect/keywords.rs @@ -236,6 +236,7 @@ define_keywords!( IDENTITY, IF, IGNORE, + ILIKE, IN, INDEX, INDICATOR, diff --git a/src/parser.rs b/src/parser.rs index bacae7873..863fc66d0 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -839,9 +839,12 @@ impl<'a> Parser<'a> { Keyword::AND => Some(BinaryOperator::And), Keyword::OR => Some(BinaryOperator::Or), Keyword::LIKE => Some(BinaryOperator::Like), + Keyword::ILIKE => Some(BinaryOperator::ILike), Keyword::NOT => { if self.parse_keyword(Keyword::LIKE) { Some(BinaryOperator::NotLike) + } else if self.parse_keyword(Keyword::ILIKE) { + Some(BinaryOperator::NotILike) } else { None } @@ -975,12 +978,14 @@ impl<'a> Parser<'a> { Token::Word(w) if w.keyword == Keyword::IN => Ok(Self::BETWEEN_PREC), Token::Word(w) if w.keyword == Keyword::BETWEEN => Ok(Self::BETWEEN_PREC), Token::Word(w) if w.keyword == Keyword::LIKE => Ok(Self::BETWEEN_PREC), + Token::Word(w) if w.keyword == Keyword::ILIKE => Ok(Self::BETWEEN_PREC), _ => Ok(0), }, Token::Word(w) if w.keyword == Keyword::IS => Ok(17), Token::Word(w) if w.keyword == Keyword::IN => Ok(Self::BETWEEN_PREC), Token::Word(w) if w.keyword == Keyword::BETWEEN => Ok(Self::BETWEEN_PREC), Token::Word(w) if w.keyword == Keyword::LIKE => Ok(Self::BETWEEN_PREC), + Token::Word(w) if w.keyword == Keyword::ILIKE => Ok(Self::BETWEEN_PREC), Token::Eq | Token::Lt | Token::LtEq @@ -1472,7 +1477,7 @@ impl<'a> Parser<'a> { ) -> Result { let if_not_exists = self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); let table_name = self.parse_object_name()?; - let like = if self.parse_keyword(Keyword::LIKE) { + let like = if self.parse_keyword(Keyword::LIKE) || self.parse_keyword(Keyword::ILIKE) { self.parse_object_name().ok() } else { None @@ -2497,6 +2502,10 @@ impl<'a> Parser<'a> { Ok(Some(ShowStatementFilter::Like( self.parse_literal_string()?, ))) + } else if self.parse_keyword(Keyword::ILIKE) { + Ok(Some(ShowStatementFilter::ILike( + self.parse_literal_string()?, + ))) } else if self.parse_keyword(Keyword::WHERE) { Ok(Some(ShowStatementFilter::Where(self.parse_expr()?))) } else { diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 10ac79d84..e43bd12ce 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -687,6 +687,51 @@ fn parse_like() { chk(true); } +#[test] +fn parse_ilike() { + fn chk(negated: bool) { + let sql = &format!( + "SELECT * FROM customers WHERE name {}ILIKE '%a'", + if negated { "NOT " } else { "" } + ); + let select = verified_only_select(sql); + assert_eq!( + Expr::BinaryOp { + left: Box::new(Expr::Identifier(Ident::new("name"))), + op: if negated { + BinaryOperator::NotILike + } else { + BinaryOperator::ILike + }, + right: Box::new(Expr::Value(Value::SingleQuotedString("%a".to_string()))), + }, + select.selection.unwrap() + ); + + // This statement tests that LIKE and NOT LIKE have the same precedence. + // This was previously mishandled (#81). + let sql = &format!( + "SELECT * FROM customers WHERE name {}ILIKE '%a' IS NULL", + if negated { "NOT " } else { "" } + ); + let select = verified_only_select(sql); + assert_eq!( + Expr::IsNull(Box::new(Expr::BinaryOp { + left: Box::new(Expr::Identifier(Ident::new("name"))), + op: if negated { + BinaryOperator::NotILike + } else { + BinaryOperator::ILike + }, + right: Box::new(Expr::Value(Value::SingleQuotedString("%a".to_string()))), + })), + select.selection.unwrap() + ); + } + chk(false); + chk(true); +} + #[test] fn parse_in_list() { fn chk(negated: bool) {